Revert "kvm: support TSC deadline MSR"
[qemu/cris-port.git] / exec.c
blob9dc4edbf61f32bb3c7b9521947c66bcf08798e65
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 //#define DEBUG_TB_INVALIDATE
61 //#define DEBUG_FLUSH
62 //#define DEBUG_TLB
63 //#define DEBUG_UNASSIGNED
65 /* make various TB consistency checks */
66 //#define DEBUG_TB_CHECK
67 //#define DEBUG_TLB_CHECK
69 //#define DEBUG_IOPORT
70 //#define DEBUG_SUBPAGE
72 #if !defined(CONFIG_USER_ONLY)
73 /* TB consistency checks only implemented for usermode emulation. */
74 #undef DEBUG_TB_CHECK
75 #endif
77 #define SMC_BITMAP_USE_THRESHOLD 10
79 static TranslationBlock *tbs;
80 static int code_gen_max_blocks;
81 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
82 static int nb_tbs;
83 /* any access to the tbs or the page table must use this lock */
84 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
86 #if defined(__arm__) || defined(__sparc_v9__)
87 /* The prologue must be reachable with a direct jump. ARM and Sparc64
88 have limited branch ranges (possibly also PPC) so place it in a
89 section close to code segment. */
90 #define code_gen_section \
91 __attribute__((__section__(".gen_code"))) \
92 __attribute__((aligned (32)))
93 #elif defined(_WIN32)
94 /* Maximum alignment for Win32 is 16. */
95 #define code_gen_section \
96 __attribute__((aligned (16)))
97 #else
98 #define code_gen_section \
99 __attribute__((aligned (32)))
100 #endif
102 uint8_t code_gen_prologue[1024] code_gen_section;
103 static uint8_t *code_gen_buffer;
104 static unsigned long code_gen_buffer_size;
105 /* threshold to flush the translated code buffer */
106 static unsigned long code_gen_buffer_max_size;
107 static uint8_t *code_gen_ptr;
109 #if !defined(CONFIG_USER_ONLY)
110 int phys_ram_fd;
111 static int in_migration;
113 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
115 static MemoryRegion *system_memory;
116 static MemoryRegion *system_io;
118 #endif
120 CPUState *first_cpu;
121 /* current CPU in the current thread. It is only valid inside
122 cpu_exec() */
123 CPUState *cpu_single_env;
124 /* 0 = Do not count executed instructions.
125 1 = Precise instruction counting.
126 2 = Adaptive rate instruction counting. */
127 int use_icount = 0;
129 typedef struct PageDesc {
130 /* list of TBs intersecting this ram page */
131 TranslationBlock *first_tb;
132 /* in order to optimize self modifying code, we count the number
133 of lookups we do to a given page to use a bitmap */
134 unsigned int code_write_count;
135 uint8_t *code_bitmap;
136 #if defined(CONFIG_USER_ONLY)
137 unsigned long flags;
138 #endif
139 } PageDesc;
141 /* In system mode we want L1_MAP to be based on ram offsets,
142 while in user mode we want it to be based on virtual addresses. */
143 #if !defined(CONFIG_USER_ONLY)
144 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
145 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
146 #else
147 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
148 #endif
149 #else
150 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
151 #endif
153 /* Size of the L2 (and L3, etc) page tables. */
154 #define L2_BITS 10
155 #define L2_SIZE (1 << L2_BITS)
157 /* The bits remaining after N lower levels of page tables. */
158 #define P_L1_BITS_REM \
159 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
160 #define V_L1_BITS_REM \
161 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
163 /* Size of the L1 page table. Avoid silly small sizes. */
164 #if P_L1_BITS_REM < 4
165 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
166 #else
167 #define P_L1_BITS P_L1_BITS_REM
168 #endif
170 #if V_L1_BITS_REM < 4
171 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
172 #else
173 #define V_L1_BITS V_L1_BITS_REM
174 #endif
176 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
177 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
179 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
180 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
182 unsigned long qemu_real_host_page_size;
183 unsigned long qemu_host_page_size;
184 unsigned long qemu_host_page_mask;
186 /* This is a multi-level map on the virtual address space.
187 The bottom level has pointers to PageDesc. */
188 static void *l1_map[V_L1_SIZE];
190 #if !defined(CONFIG_USER_ONLY)
191 typedef struct PhysPageDesc {
192 /* offset in host memory of the page + io_index in the low bits */
193 ram_addr_t phys_offset;
194 ram_addr_t region_offset;
195 } PhysPageDesc;
197 /* This is a multi-level map on the physical address space.
198 The bottom level has pointers to PhysPageDesc. */
199 static void *l1_phys_map[P_L1_SIZE];
201 static void io_mem_init(void);
202 static void memory_map_init(void);
204 /* io memory support */
205 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
206 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
207 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
208 static char io_mem_used[IO_MEM_NB_ENTRIES];
209 static int io_mem_watch;
210 #endif
212 /* log support */
213 #ifdef WIN32
214 static const char *logfilename = "qemu.log";
215 #else
216 static const char *logfilename = "/tmp/qemu.log";
217 #endif
218 FILE *logfile;
219 int loglevel;
220 static int log_append = 0;
222 /* statistics */
223 #if !defined(CONFIG_USER_ONLY)
224 static int tlb_flush_count;
225 #endif
226 static int tb_flush_count;
227 static int tb_phys_invalidate_count;
229 #ifdef _WIN32
230 static void map_exec(void *addr, long size)
232 DWORD old_protect;
233 VirtualProtect(addr, size,
234 PAGE_EXECUTE_READWRITE, &old_protect);
237 #else
238 static void map_exec(void *addr, long size)
240 unsigned long start, end, page_size;
242 page_size = getpagesize();
243 start = (unsigned long)addr;
244 start &= ~(page_size - 1);
246 end = (unsigned long)addr + size;
247 end += page_size - 1;
248 end &= ~(page_size - 1);
250 mprotect((void *)start, end - start,
251 PROT_READ | PROT_WRITE | PROT_EXEC);
253 #endif
255 static void page_init(void)
257 /* NOTE: we can always suppose that qemu_host_page_size >=
258 TARGET_PAGE_SIZE */
259 #ifdef _WIN32
261 SYSTEM_INFO system_info;
263 GetSystemInfo(&system_info);
264 qemu_real_host_page_size = system_info.dwPageSize;
266 #else
267 qemu_real_host_page_size = getpagesize();
268 #endif
269 if (qemu_host_page_size == 0)
270 qemu_host_page_size = qemu_real_host_page_size;
271 if (qemu_host_page_size < TARGET_PAGE_SIZE)
272 qemu_host_page_size = TARGET_PAGE_SIZE;
273 qemu_host_page_mask = ~(qemu_host_page_size - 1);
275 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
277 #ifdef HAVE_KINFO_GETVMMAP
278 struct kinfo_vmentry *freep;
279 int i, cnt;
281 freep = kinfo_getvmmap(getpid(), &cnt);
282 if (freep) {
283 mmap_lock();
284 for (i = 0; i < cnt; i++) {
285 unsigned long startaddr, endaddr;
287 startaddr = freep[i].kve_start;
288 endaddr = freep[i].kve_end;
289 if (h2g_valid(startaddr)) {
290 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
292 if (h2g_valid(endaddr)) {
293 endaddr = h2g(endaddr);
294 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
295 } else {
296 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
297 endaddr = ~0ul;
298 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
299 #endif
303 free(freep);
304 mmap_unlock();
306 #else
307 FILE *f;
309 last_brk = (unsigned long)sbrk(0);
311 f = fopen("/compat/linux/proc/self/maps", "r");
312 if (f) {
313 mmap_lock();
315 do {
316 unsigned long startaddr, endaddr;
317 int n;
319 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
321 if (n == 2 && h2g_valid(startaddr)) {
322 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
324 if (h2g_valid(endaddr)) {
325 endaddr = h2g(endaddr);
326 } else {
327 endaddr = ~0ul;
329 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
331 } while (!feof(f));
333 fclose(f);
334 mmap_unlock();
336 #endif
338 #endif
341 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
343 PageDesc *pd;
344 void **lp;
345 int i;
347 #if defined(CONFIG_USER_ONLY)
348 /* We can't use g_malloc because it may recurse into a locked mutex. */
349 # define ALLOC(P, SIZE) \
350 do { \
351 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
352 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
353 } while (0)
354 #else
355 # define ALLOC(P, SIZE) \
356 do { P = g_malloc0(SIZE); } while (0)
357 #endif
359 /* Level 1. Always allocated. */
360 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
362 /* Level 2..N-1. */
363 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
364 void **p = *lp;
366 if (p == NULL) {
367 if (!alloc) {
368 return NULL;
370 ALLOC(p, sizeof(void *) * L2_SIZE);
371 *lp = p;
374 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
377 pd = *lp;
378 if (pd == NULL) {
379 if (!alloc) {
380 return NULL;
382 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
383 *lp = pd;
386 #undef ALLOC
388 return pd + (index & (L2_SIZE - 1));
391 static inline PageDesc *page_find(tb_page_addr_t index)
393 return page_find_alloc(index, 0);
396 #if !defined(CONFIG_USER_ONLY)
397 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
399 PhysPageDesc *pd;
400 void **lp;
401 int i;
403 /* Level 1. Always allocated. */
404 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
406 /* Level 2..N-1. */
407 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
408 void **p = *lp;
409 if (p == NULL) {
410 if (!alloc) {
411 return NULL;
413 *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
415 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
418 pd = *lp;
419 if (pd == NULL) {
420 int i;
422 if (!alloc) {
423 return NULL;
426 *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
428 for (i = 0; i < L2_SIZE; i++) {
429 pd[i].phys_offset = IO_MEM_UNASSIGNED;
430 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
434 return pd + (index & (L2_SIZE - 1));
437 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
439 return phys_page_find_alloc(index, 0);
442 static void tlb_protect_code(ram_addr_t ram_addr);
443 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
444 target_ulong vaddr);
445 #define mmap_lock() do { } while(0)
446 #define mmap_unlock() do { } while(0)
447 #endif
449 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
451 #if defined(CONFIG_USER_ONLY)
452 /* Currently it is not recommended to allocate big chunks of data in
453 user mode. It will change when a dedicated libc will be used */
454 #define USE_STATIC_CODE_GEN_BUFFER
455 #endif
457 #ifdef USE_STATIC_CODE_GEN_BUFFER
458 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
459 __attribute__((aligned (CODE_GEN_ALIGN)));
460 #endif
462 static void code_gen_alloc(unsigned long tb_size)
464 #ifdef USE_STATIC_CODE_GEN_BUFFER
465 code_gen_buffer = static_code_gen_buffer;
466 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
467 map_exec(code_gen_buffer, code_gen_buffer_size);
468 #else
469 code_gen_buffer_size = tb_size;
470 if (code_gen_buffer_size == 0) {
471 #if defined(CONFIG_USER_ONLY)
472 /* in user mode, phys_ram_size is not meaningful */
473 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
474 #else
475 /* XXX: needs adjustments */
476 code_gen_buffer_size = (unsigned long)(ram_size / 4);
477 #endif
479 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
480 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
481 /* The code gen buffer location may have constraints depending on
482 the host cpu and OS */
483 #if defined(__linux__)
485 int flags;
486 void *start = NULL;
488 flags = MAP_PRIVATE | MAP_ANONYMOUS;
489 #if defined(__x86_64__)
490 flags |= MAP_32BIT;
491 /* Cannot map more than that */
492 if (code_gen_buffer_size > (800 * 1024 * 1024))
493 code_gen_buffer_size = (800 * 1024 * 1024);
494 #elif defined(__sparc_v9__)
495 // Map the buffer below 2G, so we can use direct calls and branches
496 flags |= MAP_FIXED;
497 start = (void *) 0x60000000UL;
498 if (code_gen_buffer_size > (512 * 1024 * 1024))
499 code_gen_buffer_size = (512 * 1024 * 1024);
500 #elif defined(__arm__)
501 /* Map the buffer below 32M, so we can use direct calls and branches */
502 flags |= MAP_FIXED;
503 start = (void *) 0x01000000UL;
504 if (code_gen_buffer_size > 16 * 1024 * 1024)
505 code_gen_buffer_size = 16 * 1024 * 1024;
506 #elif defined(__s390x__)
507 /* Map the buffer so that we can use direct calls and branches. */
508 /* We have a +- 4GB range on the branches; leave some slop. */
509 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
510 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
512 start = (void *)0x90000000UL;
513 #endif
514 code_gen_buffer = mmap(start, code_gen_buffer_size,
515 PROT_WRITE | PROT_READ | PROT_EXEC,
516 flags, -1, 0);
517 if (code_gen_buffer == MAP_FAILED) {
518 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
519 exit(1);
522 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
523 || defined(__DragonFly__) || defined(__OpenBSD__) \
524 || defined(__NetBSD__)
526 int flags;
527 void *addr = NULL;
528 flags = MAP_PRIVATE | MAP_ANONYMOUS;
529 #if defined(__x86_64__)
530 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
531 * 0x40000000 is free */
532 flags |= MAP_FIXED;
533 addr = (void *)0x40000000;
534 /* Cannot map more than that */
535 if (code_gen_buffer_size > (800 * 1024 * 1024))
536 code_gen_buffer_size = (800 * 1024 * 1024);
537 #elif defined(__sparc_v9__)
538 // Map the buffer below 2G, so we can use direct calls and branches
539 flags |= MAP_FIXED;
540 addr = (void *) 0x60000000UL;
541 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
542 code_gen_buffer_size = (512 * 1024 * 1024);
544 #endif
545 code_gen_buffer = mmap(addr, code_gen_buffer_size,
546 PROT_WRITE | PROT_READ | PROT_EXEC,
547 flags, -1, 0);
548 if (code_gen_buffer == MAP_FAILED) {
549 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
550 exit(1);
553 #else
554 code_gen_buffer = g_malloc(code_gen_buffer_size);
555 map_exec(code_gen_buffer, code_gen_buffer_size);
556 #endif
557 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
558 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
559 code_gen_buffer_max_size = code_gen_buffer_size -
560 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
561 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
562 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
565 /* Must be called before using the QEMU cpus. 'tb_size' is the size
566 (in bytes) allocated to the translation buffer. Zero means default
567 size. */
568 void tcg_exec_init(unsigned long tb_size)
570 cpu_gen_init();
571 code_gen_alloc(tb_size);
572 code_gen_ptr = code_gen_buffer;
573 page_init();
574 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
575 /* There's no guest base to take into account, so go ahead and
576 initialize the prologue now. */
577 tcg_prologue_init(&tcg_ctx);
578 #endif
581 bool tcg_enabled(void)
583 return code_gen_buffer != NULL;
586 void cpu_exec_init_all(void)
588 #if !defined(CONFIG_USER_ONLY)
589 memory_map_init();
590 io_mem_init();
591 #endif
594 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
596 static int cpu_common_post_load(void *opaque, int version_id)
598 CPUState *env = opaque;
600 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
601 version_id is increased. */
602 env->interrupt_request &= ~0x01;
603 tlb_flush(env, 1);
605 return 0;
608 static const VMStateDescription vmstate_cpu_common = {
609 .name = "cpu_common",
610 .version_id = 1,
611 .minimum_version_id = 1,
612 .minimum_version_id_old = 1,
613 .post_load = cpu_common_post_load,
614 .fields = (VMStateField []) {
615 VMSTATE_UINT32(halted, CPUState),
616 VMSTATE_UINT32(interrupt_request, CPUState),
617 VMSTATE_END_OF_LIST()
620 #endif
622 CPUState *qemu_get_cpu(int cpu)
624 CPUState *env = first_cpu;
626 while (env) {
627 if (env->cpu_index == cpu)
628 break;
629 env = env->next_cpu;
632 return env;
635 void cpu_exec_init(CPUState *env)
637 CPUState **penv;
638 int cpu_index;
640 #if defined(CONFIG_USER_ONLY)
641 cpu_list_lock();
642 #endif
643 env->next_cpu = NULL;
644 penv = &first_cpu;
645 cpu_index = 0;
646 while (*penv != NULL) {
647 penv = &(*penv)->next_cpu;
648 cpu_index++;
650 env->cpu_index = cpu_index;
651 env->numa_node = 0;
652 QTAILQ_INIT(&env->breakpoints);
653 QTAILQ_INIT(&env->watchpoints);
654 #ifndef CONFIG_USER_ONLY
655 env->thread_id = qemu_get_thread_id();
656 #endif
657 *penv = env;
658 #if defined(CONFIG_USER_ONLY)
659 cpu_list_unlock();
660 #endif
661 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
662 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
663 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
664 cpu_save, cpu_load, env);
665 #endif
668 /* Allocate a new translation block. Flush the translation buffer if
669 too many translation blocks or too much generated code. */
670 static TranslationBlock *tb_alloc(target_ulong pc)
672 TranslationBlock *tb;
674 if (nb_tbs >= code_gen_max_blocks ||
675 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
676 return NULL;
677 tb = &tbs[nb_tbs++];
678 tb->pc = pc;
679 tb->cflags = 0;
680 return tb;
683 void tb_free(TranslationBlock *tb)
685 /* In practice this is mostly used for single use temporary TB
686 Ignore the hard cases and just back up if this TB happens to
687 be the last one generated. */
688 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
689 code_gen_ptr = tb->tc_ptr;
690 nb_tbs--;
694 static inline void invalidate_page_bitmap(PageDesc *p)
696 if (p->code_bitmap) {
697 g_free(p->code_bitmap);
698 p->code_bitmap = NULL;
700 p->code_write_count = 0;
703 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
705 static void page_flush_tb_1 (int level, void **lp)
707 int i;
709 if (*lp == NULL) {
710 return;
712 if (level == 0) {
713 PageDesc *pd = *lp;
714 for (i = 0; i < L2_SIZE; ++i) {
715 pd[i].first_tb = NULL;
716 invalidate_page_bitmap(pd + i);
718 } else {
719 void **pp = *lp;
720 for (i = 0; i < L2_SIZE; ++i) {
721 page_flush_tb_1 (level - 1, pp + i);
726 static void page_flush_tb(void)
728 int i;
729 for (i = 0; i < V_L1_SIZE; i++) {
730 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
734 /* flush all the translation blocks */
735 /* XXX: tb_flush is currently not thread safe */
736 void tb_flush(CPUState *env1)
738 CPUState *env;
739 #if defined(DEBUG_FLUSH)
740 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
741 (unsigned long)(code_gen_ptr - code_gen_buffer),
742 nb_tbs, nb_tbs > 0 ?
743 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
744 #endif
745 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
746 cpu_abort(env1, "Internal error: code buffer overflow\n");
748 nb_tbs = 0;
750 for(env = first_cpu; env != NULL; env = env->next_cpu) {
751 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
754 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
755 page_flush_tb();
757 code_gen_ptr = code_gen_buffer;
758 /* XXX: flush processor icache at this point if cache flush is
759 expensive */
760 tb_flush_count++;
763 #ifdef DEBUG_TB_CHECK
765 static void tb_invalidate_check(target_ulong address)
767 TranslationBlock *tb;
768 int i;
769 address &= TARGET_PAGE_MASK;
770 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
771 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
772 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
773 address >= tb->pc + tb->size)) {
774 printf("ERROR invalidate: address=" TARGET_FMT_lx
775 " PC=%08lx size=%04x\n",
776 address, (long)tb->pc, tb->size);
782 /* verify that all the pages have correct rights for code */
783 static void tb_page_check(void)
785 TranslationBlock *tb;
786 int i, flags1, flags2;
788 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
789 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
790 flags1 = page_get_flags(tb->pc);
791 flags2 = page_get_flags(tb->pc + tb->size - 1);
792 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
793 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
794 (long)tb->pc, tb->size, flags1, flags2);
800 #endif
802 /* invalidate one TB */
803 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
804 int next_offset)
806 TranslationBlock *tb1;
807 for(;;) {
808 tb1 = *ptb;
809 if (tb1 == tb) {
810 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
811 break;
813 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
817 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
819 TranslationBlock *tb1;
820 unsigned int n1;
822 for(;;) {
823 tb1 = *ptb;
824 n1 = (long)tb1 & 3;
825 tb1 = (TranslationBlock *)((long)tb1 & ~3);
826 if (tb1 == tb) {
827 *ptb = tb1->page_next[n1];
828 break;
830 ptb = &tb1->page_next[n1];
834 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
836 TranslationBlock *tb1, **ptb;
837 unsigned int n1;
839 ptb = &tb->jmp_next[n];
840 tb1 = *ptb;
841 if (tb1) {
842 /* find tb(n) in circular list */
843 for(;;) {
844 tb1 = *ptb;
845 n1 = (long)tb1 & 3;
846 tb1 = (TranslationBlock *)((long)tb1 & ~3);
847 if (n1 == n && tb1 == tb)
848 break;
849 if (n1 == 2) {
850 ptb = &tb1->jmp_first;
851 } else {
852 ptb = &tb1->jmp_next[n1];
855 /* now we can suppress tb(n) from the list */
856 *ptb = tb->jmp_next[n];
858 tb->jmp_next[n] = NULL;
862 /* reset the jump entry 'n' of a TB so that it is not chained to
863 another TB */
864 static inline void tb_reset_jump(TranslationBlock *tb, int n)
866 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
869 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
871 CPUState *env;
872 PageDesc *p;
873 unsigned int h, n1;
874 tb_page_addr_t phys_pc;
875 TranslationBlock *tb1, *tb2;
877 /* remove the TB from the hash list */
878 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
879 h = tb_phys_hash_func(phys_pc);
880 tb_remove(&tb_phys_hash[h], tb,
881 offsetof(TranslationBlock, phys_hash_next));
883 /* remove the TB from the page list */
884 if (tb->page_addr[0] != page_addr) {
885 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
886 tb_page_remove(&p->first_tb, tb);
887 invalidate_page_bitmap(p);
889 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
890 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
891 tb_page_remove(&p->first_tb, tb);
892 invalidate_page_bitmap(p);
895 tb_invalidated_flag = 1;
897 /* remove the TB from the hash list */
898 h = tb_jmp_cache_hash_func(tb->pc);
899 for(env = first_cpu; env != NULL; env = env->next_cpu) {
900 if (env->tb_jmp_cache[h] == tb)
901 env->tb_jmp_cache[h] = NULL;
904 /* suppress this TB from the two jump lists */
905 tb_jmp_remove(tb, 0);
906 tb_jmp_remove(tb, 1);
908 /* suppress any remaining jumps to this TB */
909 tb1 = tb->jmp_first;
910 for(;;) {
911 n1 = (long)tb1 & 3;
912 if (n1 == 2)
913 break;
914 tb1 = (TranslationBlock *)((long)tb1 & ~3);
915 tb2 = tb1->jmp_next[n1];
916 tb_reset_jump(tb1, n1);
917 tb1->jmp_next[n1] = NULL;
918 tb1 = tb2;
920 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
922 tb_phys_invalidate_count++;
925 static inline void set_bits(uint8_t *tab, int start, int len)
927 int end, mask, end1;
929 end = start + len;
930 tab += start >> 3;
931 mask = 0xff << (start & 7);
932 if ((start & ~7) == (end & ~7)) {
933 if (start < end) {
934 mask &= ~(0xff << (end & 7));
935 *tab |= mask;
937 } else {
938 *tab++ |= mask;
939 start = (start + 8) & ~7;
940 end1 = end & ~7;
941 while (start < end1) {
942 *tab++ = 0xff;
943 start += 8;
945 if (start < end) {
946 mask = ~(0xff << (end & 7));
947 *tab |= mask;
952 static void build_page_bitmap(PageDesc *p)
954 int n, tb_start, tb_end;
955 TranslationBlock *tb;
957 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
959 tb = p->first_tb;
960 while (tb != NULL) {
961 n = (long)tb & 3;
962 tb = (TranslationBlock *)((long)tb & ~3);
963 /* NOTE: this is subtle as a TB may span two physical pages */
964 if (n == 0) {
965 /* NOTE: tb_end may be after the end of the page, but
966 it is not a problem */
967 tb_start = tb->pc & ~TARGET_PAGE_MASK;
968 tb_end = tb_start + tb->size;
969 if (tb_end > TARGET_PAGE_SIZE)
970 tb_end = TARGET_PAGE_SIZE;
971 } else {
972 tb_start = 0;
973 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
975 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
976 tb = tb->page_next[n];
980 TranslationBlock *tb_gen_code(CPUState *env,
981 target_ulong pc, target_ulong cs_base,
982 int flags, int cflags)
984 TranslationBlock *tb;
985 uint8_t *tc_ptr;
986 tb_page_addr_t phys_pc, phys_page2;
987 target_ulong virt_page2;
988 int code_gen_size;
990 phys_pc = get_page_addr_code(env, pc);
991 tb = tb_alloc(pc);
992 if (!tb) {
993 /* flush must be done */
994 tb_flush(env);
995 /* cannot fail at this point */
996 tb = tb_alloc(pc);
997 /* Don't forget to invalidate previous TB info. */
998 tb_invalidated_flag = 1;
1000 tc_ptr = code_gen_ptr;
1001 tb->tc_ptr = tc_ptr;
1002 tb->cs_base = cs_base;
1003 tb->flags = flags;
1004 tb->cflags = cflags;
1005 cpu_gen_code(env, tb, &code_gen_size);
1006 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1008 /* check next page if needed */
1009 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1010 phys_page2 = -1;
1011 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1012 phys_page2 = get_page_addr_code(env, virt_page2);
1014 tb_link_page(tb, phys_pc, phys_page2);
1015 return tb;
1018 /* invalidate all TBs which intersect with the target physical page
1019 starting in range [start;end[. NOTE: start and end must refer to
1020 the same physical page. 'is_cpu_write_access' should be true if called
1021 from a real cpu write access: the virtual CPU will exit the current
1022 TB if code is modified inside this TB. */
1023 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1024 int is_cpu_write_access)
1026 TranslationBlock *tb, *tb_next, *saved_tb;
1027 CPUState *env = cpu_single_env;
1028 tb_page_addr_t tb_start, tb_end;
1029 PageDesc *p;
1030 int n;
1031 #ifdef TARGET_HAS_PRECISE_SMC
1032 int current_tb_not_found = is_cpu_write_access;
1033 TranslationBlock *current_tb = NULL;
1034 int current_tb_modified = 0;
1035 target_ulong current_pc = 0;
1036 target_ulong current_cs_base = 0;
1037 int current_flags = 0;
1038 #endif /* TARGET_HAS_PRECISE_SMC */
1040 p = page_find(start >> TARGET_PAGE_BITS);
1041 if (!p)
1042 return;
1043 if (!p->code_bitmap &&
1044 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1045 is_cpu_write_access) {
1046 /* build code bitmap */
1047 build_page_bitmap(p);
1050 /* we remove all the TBs in the range [start, end[ */
1051 /* XXX: see if in some cases it could be faster to invalidate all the code */
1052 tb = p->first_tb;
1053 while (tb != NULL) {
1054 n = (long)tb & 3;
1055 tb = (TranslationBlock *)((long)tb & ~3);
1056 tb_next = tb->page_next[n];
1057 /* NOTE: this is subtle as a TB may span two physical pages */
1058 if (n == 0) {
1059 /* NOTE: tb_end may be after the end of the page, but
1060 it is not a problem */
1061 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1062 tb_end = tb_start + tb->size;
1063 } else {
1064 tb_start = tb->page_addr[1];
1065 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1067 if (!(tb_end <= start || tb_start >= end)) {
1068 #ifdef TARGET_HAS_PRECISE_SMC
1069 if (current_tb_not_found) {
1070 current_tb_not_found = 0;
1071 current_tb = NULL;
1072 if (env->mem_io_pc) {
1073 /* now we have a real cpu fault */
1074 current_tb = tb_find_pc(env->mem_io_pc);
1077 if (current_tb == tb &&
1078 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1079 /* If we are modifying the current TB, we must stop
1080 its execution. We could be more precise by checking
1081 that the modification is after the current PC, but it
1082 would require a specialized function to partially
1083 restore the CPU state */
1085 current_tb_modified = 1;
1086 cpu_restore_state(current_tb, env, env->mem_io_pc);
1087 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1088 &current_flags);
1090 #endif /* TARGET_HAS_PRECISE_SMC */
1091 /* we need to do that to handle the case where a signal
1092 occurs while doing tb_phys_invalidate() */
1093 saved_tb = NULL;
1094 if (env) {
1095 saved_tb = env->current_tb;
1096 env->current_tb = NULL;
1098 tb_phys_invalidate(tb, -1);
1099 if (env) {
1100 env->current_tb = saved_tb;
1101 if (env->interrupt_request && env->current_tb)
1102 cpu_interrupt(env, env->interrupt_request);
1105 tb = tb_next;
1107 #if !defined(CONFIG_USER_ONLY)
1108 /* if no code remaining, no need to continue to use slow writes */
1109 if (!p->first_tb) {
1110 invalidate_page_bitmap(p);
1111 if (is_cpu_write_access) {
1112 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1115 #endif
1116 #ifdef TARGET_HAS_PRECISE_SMC
1117 if (current_tb_modified) {
1118 /* we generate a block containing just the instruction
1119 modifying the memory. It will ensure that it cannot modify
1120 itself */
1121 env->current_tb = NULL;
1122 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1123 cpu_resume_from_signal(env, NULL);
1125 #endif
1128 /* len must be <= 8 and start must be a multiple of len */
1129 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1131 PageDesc *p;
1132 int offset, b;
1133 #if 0
1134 if (1) {
1135 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1136 cpu_single_env->mem_io_vaddr, len,
1137 cpu_single_env->eip,
1138 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1140 #endif
1141 p = page_find(start >> TARGET_PAGE_BITS);
1142 if (!p)
1143 return;
1144 if (p->code_bitmap) {
1145 offset = start & ~TARGET_PAGE_MASK;
1146 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1147 if (b & ((1 << len) - 1))
1148 goto do_invalidate;
1149 } else {
1150 do_invalidate:
1151 tb_invalidate_phys_page_range(start, start + len, 1);
1155 #if !defined(CONFIG_SOFTMMU)
1156 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1157 unsigned long pc, void *puc)
1159 TranslationBlock *tb;
1160 PageDesc *p;
1161 int n;
1162 #ifdef TARGET_HAS_PRECISE_SMC
1163 TranslationBlock *current_tb = NULL;
1164 CPUState *env = cpu_single_env;
1165 int current_tb_modified = 0;
1166 target_ulong current_pc = 0;
1167 target_ulong current_cs_base = 0;
1168 int current_flags = 0;
1169 #endif
1171 addr &= TARGET_PAGE_MASK;
1172 p = page_find(addr >> TARGET_PAGE_BITS);
1173 if (!p)
1174 return;
1175 tb = p->first_tb;
1176 #ifdef TARGET_HAS_PRECISE_SMC
1177 if (tb && pc != 0) {
1178 current_tb = tb_find_pc(pc);
1180 #endif
1181 while (tb != NULL) {
1182 n = (long)tb & 3;
1183 tb = (TranslationBlock *)((long)tb & ~3);
1184 #ifdef TARGET_HAS_PRECISE_SMC
1185 if (current_tb == tb &&
1186 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1187 /* If we are modifying the current TB, we must stop
1188 its execution. We could be more precise by checking
1189 that the modification is after the current PC, but it
1190 would require a specialized function to partially
1191 restore the CPU state */
1193 current_tb_modified = 1;
1194 cpu_restore_state(current_tb, env, pc);
1195 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1196 &current_flags);
1198 #endif /* TARGET_HAS_PRECISE_SMC */
1199 tb_phys_invalidate(tb, addr);
1200 tb = tb->page_next[n];
1202 p->first_tb = NULL;
1203 #ifdef TARGET_HAS_PRECISE_SMC
1204 if (current_tb_modified) {
1205 /* we generate a block containing just the instruction
1206 modifying the memory. It will ensure that it cannot modify
1207 itself */
1208 env->current_tb = NULL;
1209 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1210 cpu_resume_from_signal(env, puc);
1212 #endif
1214 #endif
1216 /* add the tb in the target page and protect it if necessary */
1217 static inline void tb_alloc_page(TranslationBlock *tb,
1218 unsigned int n, tb_page_addr_t page_addr)
1220 PageDesc *p;
1221 #ifndef CONFIG_USER_ONLY
1222 bool page_already_protected;
1223 #endif
1225 tb->page_addr[n] = page_addr;
1226 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1227 tb->page_next[n] = p->first_tb;
1228 #ifndef CONFIG_USER_ONLY
1229 page_already_protected = p->first_tb != NULL;
1230 #endif
1231 p->first_tb = (TranslationBlock *)((long)tb | n);
1232 invalidate_page_bitmap(p);
1234 #if defined(TARGET_HAS_SMC) || 1
1236 #if defined(CONFIG_USER_ONLY)
1237 if (p->flags & PAGE_WRITE) {
1238 target_ulong addr;
1239 PageDesc *p2;
1240 int prot;
1242 /* force the host page as non writable (writes will have a
1243 page fault + mprotect overhead) */
1244 page_addr &= qemu_host_page_mask;
1245 prot = 0;
1246 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1247 addr += TARGET_PAGE_SIZE) {
1249 p2 = page_find (addr >> TARGET_PAGE_BITS);
1250 if (!p2)
1251 continue;
1252 prot |= p2->flags;
1253 p2->flags &= ~PAGE_WRITE;
1255 mprotect(g2h(page_addr), qemu_host_page_size,
1256 (prot & PAGE_BITS) & ~PAGE_WRITE);
1257 #ifdef DEBUG_TB_INVALIDATE
1258 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1259 page_addr);
1260 #endif
1262 #else
1263 /* if some code is already present, then the pages are already
1264 protected. So we handle the case where only the first TB is
1265 allocated in a physical page */
1266 if (!page_already_protected) {
1267 tlb_protect_code(page_addr);
1269 #endif
1271 #endif /* TARGET_HAS_SMC */
1274 /* add a new TB and link it to the physical page tables. phys_page2 is
1275 (-1) to indicate that only one page contains the TB. */
1276 void tb_link_page(TranslationBlock *tb,
1277 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1279 unsigned int h;
1280 TranslationBlock **ptb;
1282 /* Grab the mmap lock to stop another thread invalidating this TB
1283 before we are done. */
1284 mmap_lock();
1285 /* add in the physical hash table */
1286 h = tb_phys_hash_func(phys_pc);
1287 ptb = &tb_phys_hash[h];
1288 tb->phys_hash_next = *ptb;
1289 *ptb = tb;
1291 /* add in the page list */
1292 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1293 if (phys_page2 != -1)
1294 tb_alloc_page(tb, 1, phys_page2);
1295 else
1296 tb->page_addr[1] = -1;
1298 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1299 tb->jmp_next[0] = NULL;
1300 tb->jmp_next[1] = NULL;
1302 /* init original jump addresses */
1303 if (tb->tb_next_offset[0] != 0xffff)
1304 tb_reset_jump(tb, 0);
1305 if (tb->tb_next_offset[1] != 0xffff)
1306 tb_reset_jump(tb, 1);
1308 #ifdef DEBUG_TB_CHECK
1309 tb_page_check();
1310 #endif
1311 mmap_unlock();
1314 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1315 tb[1].tc_ptr. Return NULL if not found */
1316 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1318 int m_min, m_max, m;
1319 unsigned long v;
1320 TranslationBlock *tb;
1322 if (nb_tbs <= 0)
1323 return NULL;
1324 if (tc_ptr < (unsigned long)code_gen_buffer ||
1325 tc_ptr >= (unsigned long)code_gen_ptr)
1326 return NULL;
1327 /* binary search (cf Knuth) */
1328 m_min = 0;
1329 m_max = nb_tbs - 1;
1330 while (m_min <= m_max) {
1331 m = (m_min + m_max) >> 1;
1332 tb = &tbs[m];
1333 v = (unsigned long)tb->tc_ptr;
1334 if (v == tc_ptr)
1335 return tb;
1336 else if (tc_ptr < v) {
1337 m_max = m - 1;
1338 } else {
1339 m_min = m + 1;
1342 return &tbs[m_max];
1345 static void tb_reset_jump_recursive(TranslationBlock *tb);
1347 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1349 TranslationBlock *tb1, *tb_next, **ptb;
1350 unsigned int n1;
1352 tb1 = tb->jmp_next[n];
1353 if (tb1 != NULL) {
1354 /* find head of list */
1355 for(;;) {
1356 n1 = (long)tb1 & 3;
1357 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1358 if (n1 == 2)
1359 break;
1360 tb1 = tb1->jmp_next[n1];
1362 /* we are now sure now that tb jumps to tb1 */
1363 tb_next = tb1;
1365 /* remove tb from the jmp_first list */
1366 ptb = &tb_next->jmp_first;
1367 for(;;) {
1368 tb1 = *ptb;
1369 n1 = (long)tb1 & 3;
1370 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1371 if (n1 == n && tb1 == tb)
1372 break;
1373 ptb = &tb1->jmp_next[n1];
1375 *ptb = tb->jmp_next[n];
1376 tb->jmp_next[n] = NULL;
1378 /* suppress the jump to next tb in generated code */
1379 tb_reset_jump(tb, n);
1381 /* suppress jumps in the tb on which we could have jumped */
1382 tb_reset_jump_recursive(tb_next);
1386 static void tb_reset_jump_recursive(TranslationBlock *tb)
1388 tb_reset_jump_recursive2(tb, 0);
1389 tb_reset_jump_recursive2(tb, 1);
1392 #if defined(TARGET_HAS_ICE)
1393 #if defined(CONFIG_USER_ONLY)
1394 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1396 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1398 #else
1399 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1401 target_phys_addr_t addr;
1402 target_ulong pd;
1403 ram_addr_t ram_addr;
1404 PhysPageDesc *p;
1406 addr = cpu_get_phys_page_debug(env, pc);
1407 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1408 if (!p) {
1409 pd = IO_MEM_UNASSIGNED;
1410 } else {
1411 pd = p->phys_offset;
1413 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1414 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1416 #endif
1417 #endif /* TARGET_HAS_ICE */
1419 #if defined(CONFIG_USER_ONLY)
1420 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1425 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1426 int flags, CPUWatchpoint **watchpoint)
1428 return -ENOSYS;
1430 #else
1431 /* Add a watchpoint. */
1432 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1433 int flags, CPUWatchpoint **watchpoint)
1435 target_ulong len_mask = ~(len - 1);
1436 CPUWatchpoint *wp;
1438 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1439 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1440 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1441 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1442 return -EINVAL;
1444 wp = g_malloc(sizeof(*wp));
1446 wp->vaddr = addr;
1447 wp->len_mask = len_mask;
1448 wp->flags = flags;
1450 /* keep all GDB-injected watchpoints in front */
1451 if (flags & BP_GDB)
1452 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1453 else
1454 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1456 tlb_flush_page(env, addr);
1458 if (watchpoint)
1459 *watchpoint = wp;
1460 return 0;
1463 /* Remove a specific watchpoint. */
1464 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1465 int flags)
1467 target_ulong len_mask = ~(len - 1);
1468 CPUWatchpoint *wp;
1470 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1471 if (addr == wp->vaddr && len_mask == wp->len_mask
1472 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1473 cpu_watchpoint_remove_by_ref(env, wp);
1474 return 0;
1477 return -ENOENT;
1480 /* Remove a specific watchpoint by reference. */
1481 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1483 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1485 tlb_flush_page(env, watchpoint->vaddr);
1487 g_free(watchpoint);
1490 /* Remove all matching watchpoints. */
1491 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1493 CPUWatchpoint *wp, *next;
1495 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1496 if (wp->flags & mask)
1497 cpu_watchpoint_remove_by_ref(env, wp);
1500 #endif
1502 /* Add a breakpoint. */
1503 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1504 CPUBreakpoint **breakpoint)
1506 #if defined(TARGET_HAS_ICE)
1507 CPUBreakpoint *bp;
1509 bp = g_malloc(sizeof(*bp));
1511 bp->pc = pc;
1512 bp->flags = flags;
1514 /* keep all GDB-injected breakpoints in front */
1515 if (flags & BP_GDB)
1516 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1517 else
1518 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1520 breakpoint_invalidate(env, pc);
1522 if (breakpoint)
1523 *breakpoint = bp;
1524 return 0;
1525 #else
1526 return -ENOSYS;
1527 #endif
1530 /* Remove a specific breakpoint. */
1531 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1533 #if defined(TARGET_HAS_ICE)
1534 CPUBreakpoint *bp;
1536 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1537 if (bp->pc == pc && bp->flags == flags) {
1538 cpu_breakpoint_remove_by_ref(env, bp);
1539 return 0;
1542 return -ENOENT;
1543 #else
1544 return -ENOSYS;
1545 #endif
1548 /* Remove a specific breakpoint by reference. */
1549 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1551 #if defined(TARGET_HAS_ICE)
1552 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1554 breakpoint_invalidate(env, breakpoint->pc);
1556 g_free(breakpoint);
1557 #endif
1560 /* Remove all matching breakpoints. */
1561 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1563 #if defined(TARGET_HAS_ICE)
1564 CPUBreakpoint *bp, *next;
1566 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1567 if (bp->flags & mask)
1568 cpu_breakpoint_remove_by_ref(env, bp);
1570 #endif
1573 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1574 CPU loop after each instruction */
1575 void cpu_single_step(CPUState *env, int enabled)
1577 #if defined(TARGET_HAS_ICE)
1578 if (env->singlestep_enabled != enabled) {
1579 env->singlestep_enabled = enabled;
1580 if (kvm_enabled())
1581 kvm_update_guest_debug(env, 0);
1582 else {
1583 /* must flush all the translated code to avoid inconsistencies */
1584 /* XXX: only flush what is necessary */
1585 tb_flush(env);
1588 #endif
1591 /* enable or disable low levels log */
1592 void cpu_set_log(int log_flags)
1594 loglevel = log_flags;
1595 if (loglevel && !logfile) {
1596 logfile = fopen(logfilename, log_append ? "a" : "w");
1597 if (!logfile) {
1598 perror(logfilename);
1599 _exit(1);
1601 #if !defined(CONFIG_SOFTMMU)
1602 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1604 static char logfile_buf[4096];
1605 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1607 #elif !defined(_WIN32)
1608 /* Win32 doesn't support line-buffering and requires size >= 2 */
1609 setvbuf(logfile, NULL, _IOLBF, 0);
1610 #endif
1611 log_append = 1;
1613 if (!loglevel && logfile) {
1614 fclose(logfile);
1615 logfile = NULL;
1619 void cpu_set_log_filename(const char *filename)
1621 logfilename = strdup(filename);
1622 if (logfile) {
1623 fclose(logfile);
1624 logfile = NULL;
1626 cpu_set_log(loglevel);
1629 static void cpu_unlink_tb(CPUState *env)
1631 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1632 problem and hope the cpu will stop of its own accord. For userspace
1633 emulation this often isn't actually as bad as it sounds. Often
1634 signals are used primarily to interrupt blocking syscalls. */
1635 TranslationBlock *tb;
1636 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1638 spin_lock(&interrupt_lock);
1639 tb = env->current_tb;
1640 /* if the cpu is currently executing code, we must unlink it and
1641 all the potentially executing TB */
1642 if (tb) {
1643 env->current_tb = NULL;
1644 tb_reset_jump_recursive(tb);
1646 spin_unlock(&interrupt_lock);
1649 #ifndef CONFIG_USER_ONLY
1650 /* mask must never be zero, except for A20 change call */
1651 static void tcg_handle_interrupt(CPUState *env, int mask)
1653 int old_mask;
1655 old_mask = env->interrupt_request;
1656 env->interrupt_request |= mask;
1659 * If called from iothread context, wake the target cpu in
1660 * case its halted.
1662 if (!qemu_cpu_is_self(env)) {
1663 qemu_cpu_kick(env);
1664 return;
1667 if (use_icount) {
1668 env->icount_decr.u16.high = 0xffff;
1669 if (!can_do_io(env)
1670 && (mask & ~old_mask) != 0) {
1671 cpu_abort(env, "Raised interrupt while not in I/O function");
1673 } else {
1674 cpu_unlink_tb(env);
1678 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1680 #else /* CONFIG_USER_ONLY */
1682 void cpu_interrupt(CPUState *env, int mask)
1684 env->interrupt_request |= mask;
1685 cpu_unlink_tb(env);
1687 #endif /* CONFIG_USER_ONLY */
1689 void cpu_reset_interrupt(CPUState *env, int mask)
1691 env->interrupt_request &= ~mask;
1694 void cpu_exit(CPUState *env)
1696 env->exit_request = 1;
1697 cpu_unlink_tb(env);
1700 const CPULogItem cpu_log_items[] = {
1701 { CPU_LOG_TB_OUT_ASM, "out_asm",
1702 "show generated host assembly code for each compiled TB" },
1703 { CPU_LOG_TB_IN_ASM, "in_asm",
1704 "show target assembly code for each compiled TB" },
1705 { CPU_LOG_TB_OP, "op",
1706 "show micro ops for each compiled TB" },
1707 { CPU_LOG_TB_OP_OPT, "op_opt",
1708 "show micro ops "
1709 #ifdef TARGET_I386
1710 "before eflags optimization and "
1711 #endif
1712 "after liveness analysis" },
1713 { CPU_LOG_INT, "int",
1714 "show interrupts/exceptions in short format" },
1715 { CPU_LOG_EXEC, "exec",
1716 "show trace before each executed TB (lots of logs)" },
1717 { CPU_LOG_TB_CPU, "cpu",
1718 "show CPU state before block translation" },
1719 #ifdef TARGET_I386
1720 { CPU_LOG_PCALL, "pcall",
1721 "show protected mode far calls/returns/exceptions" },
1722 { CPU_LOG_RESET, "cpu_reset",
1723 "show CPU state before CPU resets" },
1724 #endif
1725 #ifdef DEBUG_IOPORT
1726 { CPU_LOG_IOPORT, "ioport",
1727 "show all i/o ports accesses" },
1728 #endif
1729 { 0, NULL, NULL },
1732 #ifndef CONFIG_USER_ONLY
1733 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1734 = QLIST_HEAD_INITIALIZER(memory_client_list);
1736 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1737 ram_addr_t size,
1738 ram_addr_t phys_offset,
1739 bool log_dirty)
1741 CPUPhysMemoryClient *client;
1742 QLIST_FOREACH(client, &memory_client_list, list) {
1743 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1747 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1748 target_phys_addr_t end)
1750 CPUPhysMemoryClient *client;
1751 QLIST_FOREACH(client, &memory_client_list, list) {
1752 int r = client->sync_dirty_bitmap(client, start, end);
1753 if (r < 0)
1754 return r;
1756 return 0;
1759 static int cpu_notify_migration_log(int enable)
1761 CPUPhysMemoryClient *client;
1762 QLIST_FOREACH(client, &memory_client_list, list) {
1763 int r = client->migration_log(client, enable);
1764 if (r < 0)
1765 return r;
1767 return 0;
1770 struct last_map {
1771 target_phys_addr_t start_addr;
1772 ram_addr_t size;
1773 ram_addr_t phys_offset;
1776 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1777 * address. Each intermediate table provides the next L2_BITs of guest
1778 * physical address space. The number of levels vary based on host and
1779 * guest configuration, making it efficient to build the final guest
1780 * physical address by seeding the L1 offset and shifting and adding in
1781 * each L2 offset as we recurse through them. */
1782 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1783 void **lp, target_phys_addr_t addr,
1784 struct last_map *map)
1786 int i;
1788 if (*lp == NULL) {
1789 return;
1791 if (level == 0) {
1792 PhysPageDesc *pd = *lp;
1793 addr <<= L2_BITS + TARGET_PAGE_BITS;
1794 for (i = 0; i < L2_SIZE; ++i) {
1795 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1796 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1798 if (map->size &&
1799 start_addr == map->start_addr + map->size &&
1800 pd[i].phys_offset == map->phys_offset + map->size) {
1802 map->size += TARGET_PAGE_SIZE;
1803 continue;
1804 } else if (map->size) {
1805 client->set_memory(client, map->start_addr,
1806 map->size, map->phys_offset, false);
1809 map->start_addr = start_addr;
1810 map->size = TARGET_PAGE_SIZE;
1811 map->phys_offset = pd[i].phys_offset;
1814 } else {
1815 void **pp = *lp;
1816 for (i = 0; i < L2_SIZE; ++i) {
1817 phys_page_for_each_1(client, level - 1, pp + i,
1818 (addr << L2_BITS) | i, map);
1823 static void phys_page_for_each(CPUPhysMemoryClient *client)
1825 int i;
1826 struct last_map map = { };
1828 for (i = 0; i < P_L1_SIZE; ++i) {
1829 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1830 l1_phys_map + i, i, &map);
1832 if (map.size) {
1833 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1834 false);
1838 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1840 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1841 phys_page_for_each(client);
1844 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1846 QLIST_REMOVE(client, list);
1848 #endif
1850 static int cmp1(const char *s1, int n, const char *s2)
1852 if (strlen(s2) != n)
1853 return 0;
1854 return memcmp(s1, s2, n) == 0;
1857 /* takes a comma separated list of log masks. Return 0 if error. */
1858 int cpu_str_to_log_mask(const char *str)
1860 const CPULogItem *item;
1861 int mask;
1862 const char *p, *p1;
1864 p = str;
1865 mask = 0;
1866 for(;;) {
1867 p1 = strchr(p, ',');
1868 if (!p1)
1869 p1 = p + strlen(p);
1870 if(cmp1(p,p1-p,"all")) {
1871 for(item = cpu_log_items; item->mask != 0; item++) {
1872 mask |= item->mask;
1874 } else {
1875 for(item = cpu_log_items; item->mask != 0; item++) {
1876 if (cmp1(p, p1 - p, item->name))
1877 goto found;
1879 return 0;
1881 found:
1882 mask |= item->mask;
1883 if (*p1 != ',')
1884 break;
1885 p = p1 + 1;
1887 return mask;
1890 void cpu_abort(CPUState *env, const char *fmt, ...)
1892 va_list ap;
1893 va_list ap2;
1895 va_start(ap, fmt);
1896 va_copy(ap2, ap);
1897 fprintf(stderr, "qemu: fatal: ");
1898 vfprintf(stderr, fmt, ap);
1899 fprintf(stderr, "\n");
1900 #ifdef TARGET_I386
1901 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1902 #else
1903 cpu_dump_state(env, stderr, fprintf, 0);
1904 #endif
1905 if (qemu_log_enabled()) {
1906 qemu_log("qemu: fatal: ");
1907 qemu_log_vprintf(fmt, ap2);
1908 qemu_log("\n");
1909 #ifdef TARGET_I386
1910 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1911 #else
1912 log_cpu_state(env, 0);
1913 #endif
1914 qemu_log_flush();
1915 qemu_log_close();
1917 va_end(ap2);
1918 va_end(ap);
1919 #if defined(CONFIG_USER_ONLY)
1921 struct sigaction act;
1922 sigfillset(&act.sa_mask);
1923 act.sa_handler = SIG_DFL;
1924 sigaction(SIGABRT, &act, NULL);
1926 #endif
1927 abort();
1930 CPUState *cpu_copy(CPUState *env)
1932 CPUState *new_env = cpu_init(env->cpu_model_str);
1933 CPUState *next_cpu = new_env->next_cpu;
1934 int cpu_index = new_env->cpu_index;
1935 #if defined(TARGET_HAS_ICE)
1936 CPUBreakpoint *bp;
1937 CPUWatchpoint *wp;
1938 #endif
1940 memcpy(new_env, env, sizeof(CPUState));
1942 /* Preserve chaining and index. */
1943 new_env->next_cpu = next_cpu;
1944 new_env->cpu_index = cpu_index;
1946 /* Clone all break/watchpoints.
1947 Note: Once we support ptrace with hw-debug register access, make sure
1948 BP_CPU break/watchpoints are handled correctly on clone. */
1949 QTAILQ_INIT(&env->breakpoints);
1950 QTAILQ_INIT(&env->watchpoints);
1951 #if defined(TARGET_HAS_ICE)
1952 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1953 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1955 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1956 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1957 wp->flags, NULL);
1959 #endif
1961 return new_env;
1964 #if !defined(CONFIG_USER_ONLY)
1966 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1968 unsigned int i;
1970 /* Discard jump cache entries for any tb which might potentially
1971 overlap the flushed page. */
1972 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1973 memset (&env->tb_jmp_cache[i], 0,
1974 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1976 i = tb_jmp_cache_hash_page(addr);
1977 memset (&env->tb_jmp_cache[i], 0,
1978 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1981 static CPUTLBEntry s_cputlb_empty_entry = {
1982 .addr_read = -1,
1983 .addr_write = -1,
1984 .addr_code = -1,
1985 .addend = -1,
1988 /* NOTE: if flush_global is true, also flush global entries (not
1989 implemented yet) */
1990 void tlb_flush(CPUState *env, int flush_global)
1992 int i;
1994 #if defined(DEBUG_TLB)
1995 printf("tlb_flush:\n");
1996 #endif
1997 /* must reset current TB so that interrupts cannot modify the
1998 links while we are modifying them */
1999 env->current_tb = NULL;
2001 for(i = 0; i < CPU_TLB_SIZE; i++) {
2002 int mmu_idx;
2003 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2004 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2008 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2010 env->tlb_flush_addr = -1;
2011 env->tlb_flush_mask = 0;
2012 tlb_flush_count++;
2015 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2017 if (addr == (tlb_entry->addr_read &
2018 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2019 addr == (tlb_entry->addr_write &
2020 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2021 addr == (tlb_entry->addr_code &
2022 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2023 *tlb_entry = s_cputlb_empty_entry;
2027 void tlb_flush_page(CPUState *env, target_ulong addr)
2029 int i;
2030 int mmu_idx;
2032 #if defined(DEBUG_TLB)
2033 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2034 #endif
2035 /* Check if we need to flush due to large pages. */
2036 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2037 #if defined(DEBUG_TLB)
2038 printf("tlb_flush_page: forced full flush ("
2039 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2040 env->tlb_flush_addr, env->tlb_flush_mask);
2041 #endif
2042 tlb_flush(env, 1);
2043 return;
2045 /* must reset current TB so that interrupts cannot modify the
2046 links while we are modifying them */
2047 env->current_tb = NULL;
2049 addr &= TARGET_PAGE_MASK;
2050 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2051 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2052 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2054 tlb_flush_jmp_cache(env, addr);
2057 /* update the TLBs so that writes to code in the virtual page 'addr'
2058 can be detected */
2059 static void tlb_protect_code(ram_addr_t ram_addr)
2061 cpu_physical_memory_reset_dirty(ram_addr,
2062 ram_addr + TARGET_PAGE_SIZE,
2063 CODE_DIRTY_FLAG);
2066 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2067 tested for self modifying code */
2068 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2069 target_ulong vaddr)
2071 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2074 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2075 unsigned long start, unsigned long length)
2077 unsigned long addr;
2078 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2079 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2080 if ((addr - start) < length) {
2081 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2086 /* Note: start and end must be within the same ram block. */
2087 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2088 int dirty_flags)
2090 CPUState *env;
2091 unsigned long length, start1;
2092 int i;
2094 start &= TARGET_PAGE_MASK;
2095 end = TARGET_PAGE_ALIGN(end);
2097 length = end - start;
2098 if (length == 0)
2099 return;
2100 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2102 /* we modify the TLB cache so that the dirty bit will be set again
2103 when accessing the range */
2104 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2105 /* Check that we don't span multiple blocks - this breaks the
2106 address comparisons below. */
2107 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2108 != (end - 1) - start) {
2109 abort();
2112 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2113 int mmu_idx;
2114 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2115 for(i = 0; i < CPU_TLB_SIZE; i++)
2116 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2117 start1, length);
2122 int cpu_physical_memory_set_dirty_tracking(int enable)
2124 int ret = 0;
2125 in_migration = enable;
2126 ret = cpu_notify_migration_log(!!enable);
2127 return ret;
2130 int cpu_physical_memory_get_dirty_tracking(void)
2132 return in_migration;
2135 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2136 target_phys_addr_t end_addr)
2138 int ret;
2140 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2141 return ret;
2144 int cpu_physical_log_start(target_phys_addr_t start_addr,
2145 ram_addr_t size)
2147 CPUPhysMemoryClient *client;
2148 QLIST_FOREACH(client, &memory_client_list, list) {
2149 if (client->log_start) {
2150 int r = client->log_start(client, start_addr, size);
2151 if (r < 0) {
2152 return r;
2156 return 0;
2159 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2160 ram_addr_t size)
2162 CPUPhysMemoryClient *client;
2163 QLIST_FOREACH(client, &memory_client_list, list) {
2164 if (client->log_stop) {
2165 int r = client->log_stop(client, start_addr, size);
2166 if (r < 0) {
2167 return r;
2171 return 0;
2174 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2176 ram_addr_t ram_addr;
2177 void *p;
2179 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2180 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2181 + tlb_entry->addend);
2182 ram_addr = qemu_ram_addr_from_host_nofail(p);
2183 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2184 tlb_entry->addr_write |= TLB_NOTDIRTY;
2189 /* update the TLB according to the current state of the dirty bits */
2190 void cpu_tlb_update_dirty(CPUState *env)
2192 int i;
2193 int mmu_idx;
2194 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2195 for(i = 0; i < CPU_TLB_SIZE; i++)
2196 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2200 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2202 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2203 tlb_entry->addr_write = vaddr;
2206 /* update the TLB corresponding to virtual page vaddr
2207 so that it is no longer dirty */
2208 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2210 int i;
2211 int mmu_idx;
2213 vaddr &= TARGET_PAGE_MASK;
2214 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2215 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2216 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2219 /* Our TLB does not support large pages, so remember the area covered by
2220 large pages and trigger a full TLB flush if these are invalidated. */
2221 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2222 target_ulong size)
2224 target_ulong mask = ~(size - 1);
2226 if (env->tlb_flush_addr == (target_ulong)-1) {
2227 env->tlb_flush_addr = vaddr & mask;
2228 env->tlb_flush_mask = mask;
2229 return;
2231 /* Extend the existing region to include the new page.
2232 This is a compromise between unnecessary flushes and the cost
2233 of maintaining a full variable size TLB. */
2234 mask &= env->tlb_flush_mask;
2235 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2236 mask <<= 1;
2238 env->tlb_flush_addr &= mask;
2239 env->tlb_flush_mask = mask;
2242 /* Add a new TLB entry. At most one entry for a given virtual address
2243 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2244 supplied size is only used by tlb_flush_page. */
2245 void tlb_set_page(CPUState *env, target_ulong vaddr,
2246 target_phys_addr_t paddr, int prot,
2247 int mmu_idx, target_ulong size)
2249 PhysPageDesc *p;
2250 unsigned long pd;
2251 unsigned int index;
2252 target_ulong address;
2253 target_ulong code_address;
2254 unsigned long addend;
2255 CPUTLBEntry *te;
2256 CPUWatchpoint *wp;
2257 target_phys_addr_t iotlb;
2259 assert(size >= TARGET_PAGE_SIZE);
2260 if (size != TARGET_PAGE_SIZE) {
2261 tlb_add_large_page(env, vaddr, size);
2263 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2264 if (!p) {
2265 pd = IO_MEM_UNASSIGNED;
2266 } else {
2267 pd = p->phys_offset;
2269 #if defined(DEBUG_TLB)
2270 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2271 " prot=%x idx=%d pd=0x%08lx\n",
2272 vaddr, paddr, prot, mmu_idx, pd);
2273 #endif
2275 address = vaddr;
2276 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2277 /* IO memory case (romd handled later) */
2278 address |= TLB_MMIO;
2280 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2281 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2282 /* Normal RAM. */
2283 iotlb = pd & TARGET_PAGE_MASK;
2284 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2285 iotlb |= IO_MEM_NOTDIRTY;
2286 else
2287 iotlb |= IO_MEM_ROM;
2288 } else {
2289 /* IO handlers are currently passed a physical address.
2290 It would be nice to pass an offset from the base address
2291 of that region. This would avoid having to special case RAM,
2292 and avoid full address decoding in every device.
2293 We can't use the high bits of pd for this because
2294 IO_MEM_ROMD uses these as a ram address. */
2295 iotlb = (pd & ~TARGET_PAGE_MASK);
2296 if (p) {
2297 iotlb += p->region_offset;
2298 } else {
2299 iotlb += paddr;
2303 code_address = address;
2304 /* Make accesses to pages with watchpoints go via the
2305 watchpoint trap routines. */
2306 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2307 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2308 /* Avoid trapping reads of pages with a write breakpoint. */
2309 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2310 iotlb = io_mem_watch + paddr;
2311 address |= TLB_MMIO;
2312 break;
2317 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2318 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2319 te = &env->tlb_table[mmu_idx][index];
2320 te->addend = addend - vaddr;
2321 if (prot & PAGE_READ) {
2322 te->addr_read = address;
2323 } else {
2324 te->addr_read = -1;
2327 if (prot & PAGE_EXEC) {
2328 te->addr_code = code_address;
2329 } else {
2330 te->addr_code = -1;
2332 if (prot & PAGE_WRITE) {
2333 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2334 (pd & IO_MEM_ROMD)) {
2335 /* Write access calls the I/O callback. */
2336 te->addr_write = address | TLB_MMIO;
2337 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2338 !cpu_physical_memory_is_dirty(pd)) {
2339 te->addr_write = address | TLB_NOTDIRTY;
2340 } else {
2341 te->addr_write = address;
2343 } else {
2344 te->addr_write = -1;
2348 #else
2350 void tlb_flush(CPUState *env, int flush_global)
2354 void tlb_flush_page(CPUState *env, target_ulong addr)
2359 * Walks guest process memory "regions" one by one
2360 * and calls callback function 'fn' for each region.
2363 struct walk_memory_regions_data
2365 walk_memory_regions_fn fn;
2366 void *priv;
2367 unsigned long start;
2368 int prot;
2371 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2372 abi_ulong end, int new_prot)
2374 if (data->start != -1ul) {
2375 int rc = data->fn(data->priv, data->start, end, data->prot);
2376 if (rc != 0) {
2377 return rc;
2381 data->start = (new_prot ? end : -1ul);
2382 data->prot = new_prot;
2384 return 0;
2387 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2388 abi_ulong base, int level, void **lp)
2390 abi_ulong pa;
2391 int i, rc;
2393 if (*lp == NULL) {
2394 return walk_memory_regions_end(data, base, 0);
2397 if (level == 0) {
2398 PageDesc *pd = *lp;
2399 for (i = 0; i < L2_SIZE; ++i) {
2400 int prot = pd[i].flags;
2402 pa = base | (i << TARGET_PAGE_BITS);
2403 if (prot != data->prot) {
2404 rc = walk_memory_regions_end(data, pa, prot);
2405 if (rc != 0) {
2406 return rc;
2410 } else {
2411 void **pp = *lp;
2412 for (i = 0; i < L2_SIZE; ++i) {
2413 pa = base | ((abi_ulong)i <<
2414 (TARGET_PAGE_BITS + L2_BITS * level));
2415 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2416 if (rc != 0) {
2417 return rc;
2422 return 0;
2425 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2427 struct walk_memory_regions_data data;
2428 unsigned long i;
2430 data.fn = fn;
2431 data.priv = priv;
2432 data.start = -1ul;
2433 data.prot = 0;
2435 for (i = 0; i < V_L1_SIZE; i++) {
2436 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2437 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2438 if (rc != 0) {
2439 return rc;
2443 return walk_memory_regions_end(&data, 0, 0);
2446 static int dump_region(void *priv, abi_ulong start,
2447 abi_ulong end, unsigned long prot)
2449 FILE *f = (FILE *)priv;
2451 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2452 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2453 start, end, end - start,
2454 ((prot & PAGE_READ) ? 'r' : '-'),
2455 ((prot & PAGE_WRITE) ? 'w' : '-'),
2456 ((prot & PAGE_EXEC) ? 'x' : '-'));
2458 return (0);
2461 /* dump memory mappings */
2462 void page_dump(FILE *f)
2464 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2465 "start", "end", "size", "prot");
2466 walk_memory_regions(f, dump_region);
2469 int page_get_flags(target_ulong address)
2471 PageDesc *p;
2473 p = page_find(address >> TARGET_PAGE_BITS);
2474 if (!p)
2475 return 0;
2476 return p->flags;
2479 /* Modify the flags of a page and invalidate the code if necessary.
2480 The flag PAGE_WRITE_ORG is positioned automatically depending
2481 on PAGE_WRITE. The mmap_lock should already be held. */
2482 void page_set_flags(target_ulong start, target_ulong end, int flags)
2484 target_ulong addr, len;
2486 /* This function should never be called with addresses outside the
2487 guest address space. If this assert fires, it probably indicates
2488 a missing call to h2g_valid. */
2489 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2490 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2491 #endif
2492 assert(start < end);
2494 start = start & TARGET_PAGE_MASK;
2495 end = TARGET_PAGE_ALIGN(end);
2497 if (flags & PAGE_WRITE) {
2498 flags |= PAGE_WRITE_ORG;
2501 for (addr = start, len = end - start;
2502 len != 0;
2503 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2504 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2506 /* If the write protection bit is set, then we invalidate
2507 the code inside. */
2508 if (!(p->flags & PAGE_WRITE) &&
2509 (flags & PAGE_WRITE) &&
2510 p->first_tb) {
2511 tb_invalidate_phys_page(addr, 0, NULL);
2513 p->flags = flags;
2517 int page_check_range(target_ulong start, target_ulong len, int flags)
2519 PageDesc *p;
2520 target_ulong end;
2521 target_ulong addr;
2523 /* This function should never be called with addresses outside the
2524 guest address space. If this assert fires, it probably indicates
2525 a missing call to h2g_valid. */
2526 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2527 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2528 #endif
2530 if (len == 0) {
2531 return 0;
2533 if (start + len - 1 < start) {
2534 /* We've wrapped around. */
2535 return -1;
2538 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2539 start = start & TARGET_PAGE_MASK;
2541 for (addr = start, len = end - start;
2542 len != 0;
2543 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2544 p = page_find(addr >> TARGET_PAGE_BITS);
2545 if( !p )
2546 return -1;
2547 if( !(p->flags & PAGE_VALID) )
2548 return -1;
2550 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2551 return -1;
2552 if (flags & PAGE_WRITE) {
2553 if (!(p->flags & PAGE_WRITE_ORG))
2554 return -1;
2555 /* unprotect the page if it was put read-only because it
2556 contains translated code */
2557 if (!(p->flags & PAGE_WRITE)) {
2558 if (!page_unprotect(addr, 0, NULL))
2559 return -1;
2561 return 0;
2564 return 0;
2567 /* called from signal handler: invalidate the code and unprotect the
2568 page. Return TRUE if the fault was successfully handled. */
2569 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2571 unsigned int prot;
2572 PageDesc *p;
2573 target_ulong host_start, host_end, addr;
2575 /* Technically this isn't safe inside a signal handler. However we
2576 know this only ever happens in a synchronous SEGV handler, so in
2577 practice it seems to be ok. */
2578 mmap_lock();
2580 p = page_find(address >> TARGET_PAGE_BITS);
2581 if (!p) {
2582 mmap_unlock();
2583 return 0;
2586 /* if the page was really writable, then we change its
2587 protection back to writable */
2588 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2589 host_start = address & qemu_host_page_mask;
2590 host_end = host_start + qemu_host_page_size;
2592 prot = 0;
2593 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2594 p = page_find(addr >> TARGET_PAGE_BITS);
2595 p->flags |= PAGE_WRITE;
2596 prot |= p->flags;
2598 /* and since the content will be modified, we must invalidate
2599 the corresponding translated code. */
2600 tb_invalidate_phys_page(addr, pc, puc);
2601 #ifdef DEBUG_TB_CHECK
2602 tb_invalidate_check(addr);
2603 #endif
2605 mprotect((void *)g2h(host_start), qemu_host_page_size,
2606 prot & PAGE_BITS);
2608 mmap_unlock();
2609 return 1;
2611 mmap_unlock();
2612 return 0;
2615 static inline void tlb_set_dirty(CPUState *env,
2616 unsigned long addr, target_ulong vaddr)
2619 #endif /* defined(CONFIG_USER_ONLY) */
2621 #if !defined(CONFIG_USER_ONLY)
2623 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2624 typedef struct subpage_t {
2625 target_phys_addr_t base;
2626 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2627 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2628 } subpage_t;
2630 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2631 ram_addr_t memory, ram_addr_t region_offset);
2632 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2633 ram_addr_t orig_memory,
2634 ram_addr_t region_offset);
2635 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2636 need_subpage) \
2637 do { \
2638 if (addr > start_addr) \
2639 start_addr2 = 0; \
2640 else { \
2641 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2642 if (start_addr2 > 0) \
2643 need_subpage = 1; \
2646 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2647 end_addr2 = TARGET_PAGE_SIZE - 1; \
2648 else { \
2649 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2650 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2651 need_subpage = 1; \
2653 } while (0)
2655 /* register physical memory.
2656 For RAM, 'size' must be a multiple of the target page size.
2657 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2658 io memory page. The address used when calling the IO function is
2659 the offset from the start of the region, plus region_offset. Both
2660 start_addr and region_offset are rounded down to a page boundary
2661 before calculating this offset. This should not be a problem unless
2662 the low bits of start_addr and region_offset differ. */
2663 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2664 ram_addr_t size,
2665 ram_addr_t phys_offset,
2666 ram_addr_t region_offset,
2667 bool log_dirty)
2669 target_phys_addr_t addr, end_addr;
2670 PhysPageDesc *p;
2671 CPUState *env;
2672 ram_addr_t orig_size = size;
2673 subpage_t *subpage;
2675 assert(size);
2676 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2678 if (phys_offset == IO_MEM_UNASSIGNED) {
2679 region_offset = start_addr;
2681 region_offset &= TARGET_PAGE_MASK;
2682 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2683 end_addr = start_addr + (target_phys_addr_t)size;
2685 addr = start_addr;
2686 do {
2687 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2688 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2689 ram_addr_t orig_memory = p->phys_offset;
2690 target_phys_addr_t start_addr2, end_addr2;
2691 int need_subpage = 0;
2693 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2694 need_subpage);
2695 if (need_subpage) {
2696 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2697 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2698 &p->phys_offset, orig_memory,
2699 p->region_offset);
2700 } else {
2701 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2702 >> IO_MEM_SHIFT];
2704 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2705 region_offset);
2706 p->region_offset = 0;
2707 } else {
2708 p->phys_offset = phys_offset;
2709 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2710 (phys_offset & IO_MEM_ROMD))
2711 phys_offset += TARGET_PAGE_SIZE;
2713 } else {
2714 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2715 p->phys_offset = phys_offset;
2716 p->region_offset = region_offset;
2717 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2718 (phys_offset & IO_MEM_ROMD)) {
2719 phys_offset += TARGET_PAGE_SIZE;
2720 } else {
2721 target_phys_addr_t start_addr2, end_addr2;
2722 int need_subpage = 0;
2724 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2725 end_addr2, need_subpage);
2727 if (need_subpage) {
2728 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2729 &p->phys_offset, IO_MEM_UNASSIGNED,
2730 addr & TARGET_PAGE_MASK);
2731 subpage_register(subpage, start_addr2, end_addr2,
2732 phys_offset, region_offset);
2733 p->region_offset = 0;
2737 region_offset += TARGET_PAGE_SIZE;
2738 addr += TARGET_PAGE_SIZE;
2739 } while (addr != end_addr);
2741 /* since each CPU stores ram addresses in its TLB cache, we must
2742 reset the modified entries */
2743 /* XXX: slow ! */
2744 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2745 tlb_flush(env, 1);
2749 /* XXX: temporary until new memory mapping API */
2750 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2752 PhysPageDesc *p;
2754 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2755 if (!p)
2756 return IO_MEM_UNASSIGNED;
2757 return p->phys_offset;
2760 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2762 if (kvm_enabled())
2763 kvm_coalesce_mmio_region(addr, size);
2766 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2768 if (kvm_enabled())
2769 kvm_uncoalesce_mmio_region(addr, size);
2772 void qemu_flush_coalesced_mmio_buffer(void)
2774 if (kvm_enabled())
2775 kvm_flush_coalesced_mmio_buffer();
2778 #if defined(__linux__) && !defined(TARGET_S390X)
2780 #include <sys/vfs.h>
2782 #define HUGETLBFS_MAGIC 0x958458f6
2784 static long gethugepagesize(const char *path)
2786 struct statfs fs;
2787 int ret;
2789 do {
2790 ret = statfs(path, &fs);
2791 } while (ret != 0 && errno == EINTR);
2793 if (ret != 0) {
2794 perror(path);
2795 return 0;
2798 if (fs.f_type != HUGETLBFS_MAGIC)
2799 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2801 return fs.f_bsize;
2804 static void *file_ram_alloc(RAMBlock *block,
2805 ram_addr_t memory,
2806 const char *path)
2808 char *filename;
2809 void *area;
2810 int fd;
2811 #ifdef MAP_POPULATE
2812 int flags;
2813 #endif
2814 unsigned long hpagesize;
2816 hpagesize = gethugepagesize(path);
2817 if (!hpagesize) {
2818 return NULL;
2821 if (memory < hpagesize) {
2822 return NULL;
2825 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2826 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2827 return NULL;
2830 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2831 return NULL;
2834 fd = mkstemp(filename);
2835 if (fd < 0) {
2836 perror("unable to create backing store for hugepages");
2837 free(filename);
2838 return NULL;
2840 unlink(filename);
2841 free(filename);
2843 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2846 * ftruncate is not supported by hugetlbfs in older
2847 * hosts, so don't bother bailing out on errors.
2848 * If anything goes wrong with it under other filesystems,
2849 * mmap will fail.
2851 if (ftruncate(fd, memory))
2852 perror("ftruncate");
2854 #ifdef MAP_POPULATE
2855 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2856 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2857 * to sidestep this quirk.
2859 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2860 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2861 #else
2862 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2863 #endif
2864 if (area == MAP_FAILED) {
2865 perror("file_ram_alloc: can't mmap RAM pages");
2866 close(fd);
2867 return (NULL);
2869 block->fd = fd;
2870 return area;
2872 #endif
2874 static ram_addr_t find_ram_offset(ram_addr_t size)
2876 RAMBlock *block, *next_block;
2877 ram_addr_t offset = 0, mingap = RAM_ADDR_MAX;
2879 if (QLIST_EMPTY(&ram_list.blocks))
2880 return 0;
2882 QLIST_FOREACH(block, &ram_list.blocks, next) {
2883 ram_addr_t end, next = RAM_ADDR_MAX;
2885 end = block->offset + block->length;
2887 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2888 if (next_block->offset >= end) {
2889 next = MIN(next, next_block->offset);
2892 if (next - end >= size && next - end < mingap) {
2893 offset = end;
2894 mingap = next - end;
2897 return offset;
2900 static ram_addr_t last_ram_offset(void)
2902 RAMBlock *block;
2903 ram_addr_t last = 0;
2905 QLIST_FOREACH(block, &ram_list.blocks, next)
2906 last = MAX(last, block->offset + block->length);
2908 return last;
2911 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2912 ram_addr_t size, void *host)
2914 RAMBlock *new_block, *block;
2916 size = TARGET_PAGE_ALIGN(size);
2917 new_block = g_malloc0(sizeof(*new_block));
2919 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2920 char *id = dev->parent_bus->info->get_dev_path(dev);
2921 if (id) {
2922 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2923 g_free(id);
2926 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2928 QLIST_FOREACH(block, &ram_list.blocks, next) {
2929 if (!strcmp(block->idstr, new_block->idstr)) {
2930 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2931 new_block->idstr);
2932 abort();
2936 new_block->offset = find_ram_offset(size);
2937 if (host) {
2938 new_block->host = host;
2939 new_block->flags |= RAM_PREALLOC_MASK;
2940 } else {
2941 if (mem_path) {
2942 #if defined (__linux__) && !defined(TARGET_S390X)
2943 new_block->host = file_ram_alloc(new_block, size, mem_path);
2944 if (!new_block->host) {
2945 new_block->host = qemu_vmalloc(size);
2946 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2948 #else
2949 fprintf(stderr, "-mem-path option unsupported\n");
2950 exit(1);
2951 #endif
2952 } else {
2953 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2954 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2955 an system defined value, which is at least 256GB. Larger systems
2956 have larger values. We put the guest between the end of data
2957 segment (system break) and this value. We use 32GB as a base to
2958 have enough room for the system break to grow. */
2959 new_block->host = mmap((void*)0x800000000, size,
2960 PROT_EXEC|PROT_READ|PROT_WRITE,
2961 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2962 if (new_block->host == MAP_FAILED) {
2963 fprintf(stderr, "Allocating RAM failed\n");
2964 abort();
2966 #else
2967 if (xen_enabled()) {
2968 xen_ram_alloc(new_block->offset, size);
2969 } else {
2970 new_block->host = qemu_vmalloc(size);
2972 #endif
2973 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2976 new_block->length = size;
2978 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2980 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2981 last_ram_offset() >> TARGET_PAGE_BITS);
2982 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2983 0xff, size >> TARGET_PAGE_BITS);
2985 if (kvm_enabled())
2986 kvm_setup_guest_memory(new_block->host, size);
2988 return new_block->offset;
2991 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2993 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2996 void qemu_ram_free_from_ptr(ram_addr_t addr)
2998 RAMBlock *block;
3000 QLIST_FOREACH(block, &ram_list.blocks, next) {
3001 if (addr == block->offset) {
3002 QLIST_REMOVE(block, next);
3003 g_free(block);
3004 return;
3009 void qemu_ram_free(ram_addr_t addr)
3011 RAMBlock *block;
3013 QLIST_FOREACH(block, &ram_list.blocks, next) {
3014 if (addr == block->offset) {
3015 QLIST_REMOVE(block, next);
3016 if (block->flags & RAM_PREALLOC_MASK) {
3018 } else if (mem_path) {
3019 #if defined (__linux__) && !defined(TARGET_S390X)
3020 if (block->fd) {
3021 munmap(block->host, block->length);
3022 close(block->fd);
3023 } else {
3024 qemu_vfree(block->host);
3026 #else
3027 abort();
3028 #endif
3029 } else {
3030 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3031 munmap(block->host, block->length);
3032 #else
3033 if (xen_enabled()) {
3034 xen_invalidate_map_cache_entry(block->host);
3035 } else {
3036 qemu_vfree(block->host);
3038 #endif
3040 g_free(block);
3041 return;
3047 #ifndef _WIN32
3048 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3050 RAMBlock *block;
3051 ram_addr_t offset;
3052 int flags;
3053 void *area, *vaddr;
3055 QLIST_FOREACH(block, &ram_list.blocks, next) {
3056 offset = addr - block->offset;
3057 if (offset < block->length) {
3058 vaddr = block->host + offset;
3059 if (block->flags & RAM_PREALLOC_MASK) {
3061 } else {
3062 flags = MAP_FIXED;
3063 munmap(vaddr, length);
3064 if (mem_path) {
3065 #if defined(__linux__) && !defined(TARGET_S390X)
3066 if (block->fd) {
3067 #ifdef MAP_POPULATE
3068 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3069 MAP_PRIVATE;
3070 #else
3071 flags |= MAP_PRIVATE;
3072 #endif
3073 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3074 flags, block->fd, offset);
3075 } else {
3076 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3077 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3078 flags, -1, 0);
3080 #else
3081 abort();
3082 #endif
3083 } else {
3084 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3085 flags |= MAP_SHARED | MAP_ANONYMOUS;
3086 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3087 flags, -1, 0);
3088 #else
3089 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3090 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3091 flags, -1, 0);
3092 #endif
3094 if (area != vaddr) {
3095 fprintf(stderr, "Could not remap addr: "
3096 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3097 length, addr);
3098 exit(1);
3100 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3102 return;
3106 #endif /* !_WIN32 */
3108 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3109 With the exception of the softmmu code in this file, this should
3110 only be used for local memory (e.g. video ram) that the device owns,
3111 and knows it isn't going to access beyond the end of the block.
3113 It should not be used for general purpose DMA.
3114 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3116 void *qemu_get_ram_ptr(ram_addr_t addr)
3118 RAMBlock *block;
3120 QLIST_FOREACH(block, &ram_list.blocks, next) {
3121 if (addr - block->offset < block->length) {
3122 /* Move this entry to to start of the list. */
3123 if (block != QLIST_FIRST(&ram_list.blocks)) {
3124 QLIST_REMOVE(block, next);
3125 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3127 if (xen_enabled()) {
3128 /* We need to check if the requested address is in the RAM
3129 * because we don't want to map the entire memory in QEMU.
3130 * In that case just map until the end of the page.
3132 if (block->offset == 0) {
3133 return xen_map_cache(addr, 0, 0);
3134 } else if (block->host == NULL) {
3135 block->host =
3136 xen_map_cache(block->offset, block->length, 1);
3139 return block->host + (addr - block->offset);
3143 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3144 abort();
3146 return NULL;
3149 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3150 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3152 void *qemu_safe_ram_ptr(ram_addr_t addr)
3154 RAMBlock *block;
3156 QLIST_FOREACH(block, &ram_list.blocks, next) {
3157 if (addr - block->offset < block->length) {
3158 if (xen_enabled()) {
3159 /* We need to check if the requested address is in the RAM
3160 * because we don't want to map the entire memory in QEMU.
3161 * In that case just map until the end of the page.
3163 if (block->offset == 0) {
3164 return xen_map_cache(addr, 0, 0);
3165 } else if (block->host == NULL) {
3166 block->host =
3167 xen_map_cache(block->offset, block->length, 1);
3170 return block->host + (addr - block->offset);
3174 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3175 abort();
3177 return NULL;
3180 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3181 * but takes a size argument */
3182 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3184 if (*size == 0) {
3185 return NULL;
3187 if (xen_enabled()) {
3188 return xen_map_cache(addr, *size, 1);
3189 } else {
3190 RAMBlock *block;
3192 QLIST_FOREACH(block, &ram_list.blocks, next) {
3193 if (addr - block->offset < block->length) {
3194 if (addr - block->offset + *size > block->length)
3195 *size = block->length - addr + block->offset;
3196 return block->host + (addr - block->offset);
3200 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3201 abort();
3205 void qemu_put_ram_ptr(void *addr)
3207 trace_qemu_put_ram_ptr(addr);
3210 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3212 RAMBlock *block;
3213 uint8_t *host = ptr;
3215 if (xen_enabled()) {
3216 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3217 return 0;
3220 QLIST_FOREACH(block, &ram_list.blocks, next) {
3221 /* This case append when the block is not mapped. */
3222 if (block->host == NULL) {
3223 continue;
3225 if (host - block->host < block->length) {
3226 *ram_addr = block->offset + (host - block->host);
3227 return 0;
3231 return -1;
3234 /* Some of the softmmu routines need to translate from a host pointer
3235 (typically a TLB entry) back to a ram offset. */
3236 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3238 ram_addr_t ram_addr;
3240 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3241 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3242 abort();
3244 return ram_addr;
3247 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3249 #ifdef DEBUG_UNASSIGNED
3250 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3251 #endif
3252 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3253 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3254 #endif
3255 return 0;
3258 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3260 #ifdef DEBUG_UNASSIGNED
3261 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3262 #endif
3263 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3264 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3265 #endif
3266 return 0;
3269 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3271 #ifdef DEBUG_UNASSIGNED
3272 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3273 #endif
3274 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3275 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3276 #endif
3277 return 0;
3280 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3282 #ifdef DEBUG_UNASSIGNED
3283 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3284 #endif
3285 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3286 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3287 #endif
3290 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3292 #ifdef DEBUG_UNASSIGNED
3293 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3294 #endif
3295 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3296 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3297 #endif
3300 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3302 #ifdef DEBUG_UNASSIGNED
3303 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3304 #endif
3305 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3306 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3307 #endif
3310 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3311 unassigned_mem_readb,
3312 unassigned_mem_readw,
3313 unassigned_mem_readl,
3316 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3317 unassigned_mem_writeb,
3318 unassigned_mem_writew,
3319 unassigned_mem_writel,
3322 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3323 uint32_t val)
3325 int dirty_flags;
3326 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3327 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3328 #if !defined(CONFIG_USER_ONLY)
3329 tb_invalidate_phys_page_fast(ram_addr, 1);
3330 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3331 #endif
3333 stb_p(qemu_get_ram_ptr(ram_addr), val);
3334 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3335 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3336 /* we remove the notdirty callback only if the code has been
3337 flushed */
3338 if (dirty_flags == 0xff)
3339 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3342 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3343 uint32_t val)
3345 int dirty_flags;
3346 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3347 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3348 #if !defined(CONFIG_USER_ONLY)
3349 tb_invalidate_phys_page_fast(ram_addr, 2);
3350 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3351 #endif
3353 stw_p(qemu_get_ram_ptr(ram_addr), val);
3354 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3355 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3356 /* we remove the notdirty callback only if the code has been
3357 flushed */
3358 if (dirty_flags == 0xff)
3359 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3362 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3363 uint32_t val)
3365 int dirty_flags;
3366 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3367 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3368 #if !defined(CONFIG_USER_ONLY)
3369 tb_invalidate_phys_page_fast(ram_addr, 4);
3370 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3371 #endif
3373 stl_p(qemu_get_ram_ptr(ram_addr), val);
3374 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3375 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3376 /* we remove the notdirty callback only if the code has been
3377 flushed */
3378 if (dirty_flags == 0xff)
3379 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3382 static CPUReadMemoryFunc * const error_mem_read[3] = {
3383 NULL, /* never used */
3384 NULL, /* never used */
3385 NULL, /* never used */
3388 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3389 notdirty_mem_writeb,
3390 notdirty_mem_writew,
3391 notdirty_mem_writel,
3394 /* Generate a debug exception if a watchpoint has been hit. */
3395 static void check_watchpoint(int offset, int len_mask, int flags)
3397 CPUState *env = cpu_single_env;
3398 target_ulong pc, cs_base;
3399 TranslationBlock *tb;
3400 target_ulong vaddr;
3401 CPUWatchpoint *wp;
3402 int cpu_flags;
3404 if (env->watchpoint_hit) {
3405 /* We re-entered the check after replacing the TB. Now raise
3406 * the debug interrupt so that is will trigger after the
3407 * current instruction. */
3408 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3409 return;
3411 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3412 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3413 if ((vaddr == (wp->vaddr & len_mask) ||
3414 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3415 wp->flags |= BP_WATCHPOINT_HIT;
3416 if (!env->watchpoint_hit) {
3417 env->watchpoint_hit = wp;
3418 tb = tb_find_pc(env->mem_io_pc);
3419 if (!tb) {
3420 cpu_abort(env, "check_watchpoint: could not find TB for "
3421 "pc=%p", (void *)env->mem_io_pc);
3423 cpu_restore_state(tb, env, env->mem_io_pc);
3424 tb_phys_invalidate(tb, -1);
3425 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3426 env->exception_index = EXCP_DEBUG;
3427 } else {
3428 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3429 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3431 cpu_resume_from_signal(env, NULL);
3433 } else {
3434 wp->flags &= ~BP_WATCHPOINT_HIT;
3439 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3440 so these check for a hit then pass through to the normal out-of-line
3441 phys routines. */
3442 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3444 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3445 return ldub_phys(addr);
3448 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3450 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3451 return lduw_phys(addr);
3454 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3456 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3457 return ldl_phys(addr);
3460 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3461 uint32_t val)
3463 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3464 stb_phys(addr, val);
3467 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3468 uint32_t val)
3470 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3471 stw_phys(addr, val);
3474 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3475 uint32_t val)
3477 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3478 stl_phys(addr, val);
3481 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3482 watch_mem_readb,
3483 watch_mem_readw,
3484 watch_mem_readl,
3487 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3488 watch_mem_writeb,
3489 watch_mem_writew,
3490 watch_mem_writel,
3493 static inline uint32_t subpage_readlen (subpage_t *mmio,
3494 target_phys_addr_t addr,
3495 unsigned int len)
3497 unsigned int idx = SUBPAGE_IDX(addr);
3498 #if defined(DEBUG_SUBPAGE)
3499 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3500 mmio, len, addr, idx);
3501 #endif
3503 addr += mmio->region_offset[idx];
3504 idx = mmio->sub_io_index[idx];
3505 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3508 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3509 uint32_t value, unsigned int len)
3511 unsigned int idx = SUBPAGE_IDX(addr);
3512 #if defined(DEBUG_SUBPAGE)
3513 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3514 __func__, mmio, len, addr, idx, value);
3515 #endif
3517 addr += mmio->region_offset[idx];
3518 idx = mmio->sub_io_index[idx];
3519 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3522 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3524 return subpage_readlen(opaque, addr, 0);
3527 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3528 uint32_t value)
3530 subpage_writelen(opaque, addr, value, 0);
3533 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3535 return subpage_readlen(opaque, addr, 1);
3538 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3539 uint32_t value)
3541 subpage_writelen(opaque, addr, value, 1);
3544 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3546 return subpage_readlen(opaque, addr, 2);
3549 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3550 uint32_t value)
3552 subpage_writelen(opaque, addr, value, 2);
3555 static CPUReadMemoryFunc * const subpage_read[] = {
3556 &subpage_readb,
3557 &subpage_readw,
3558 &subpage_readl,
3561 static CPUWriteMemoryFunc * const subpage_write[] = {
3562 &subpage_writeb,
3563 &subpage_writew,
3564 &subpage_writel,
3567 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3568 ram_addr_t memory, ram_addr_t region_offset)
3570 int idx, eidx;
3572 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3573 return -1;
3574 idx = SUBPAGE_IDX(start);
3575 eidx = SUBPAGE_IDX(end);
3576 #if defined(DEBUG_SUBPAGE)
3577 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3578 mmio, start, end, idx, eidx, memory);
3579 #endif
3580 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3581 memory = IO_MEM_UNASSIGNED;
3582 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3583 for (; idx <= eidx; idx++) {
3584 mmio->sub_io_index[idx] = memory;
3585 mmio->region_offset[idx] = region_offset;
3588 return 0;
3591 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3592 ram_addr_t orig_memory,
3593 ram_addr_t region_offset)
3595 subpage_t *mmio;
3596 int subpage_memory;
3598 mmio = g_malloc0(sizeof(subpage_t));
3600 mmio->base = base;
3601 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3602 DEVICE_NATIVE_ENDIAN);
3603 #if defined(DEBUG_SUBPAGE)
3604 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3605 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3606 #endif
3607 *phys = subpage_memory | IO_MEM_SUBPAGE;
3608 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3610 return mmio;
3613 static int get_free_io_mem_idx(void)
3615 int i;
3617 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3618 if (!io_mem_used[i]) {
3619 io_mem_used[i] = 1;
3620 return i;
3622 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3623 return -1;
3627 * Usually, devices operate in little endian mode. There are devices out
3628 * there that operate in big endian too. Each device gets byte swapped
3629 * mmio if plugged onto a CPU that does the other endianness.
3631 * CPU Device swap?
3633 * little little no
3634 * little big yes
3635 * big little yes
3636 * big big no
3639 typedef struct SwapEndianContainer {
3640 CPUReadMemoryFunc *read[3];
3641 CPUWriteMemoryFunc *write[3];
3642 void *opaque;
3643 } SwapEndianContainer;
3645 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3647 uint32_t val;
3648 SwapEndianContainer *c = opaque;
3649 val = c->read[0](c->opaque, addr);
3650 return val;
3653 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3655 uint32_t val;
3656 SwapEndianContainer *c = opaque;
3657 val = bswap16(c->read[1](c->opaque, addr));
3658 return val;
3661 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3663 uint32_t val;
3664 SwapEndianContainer *c = opaque;
3665 val = bswap32(c->read[2](c->opaque, addr));
3666 return val;
3669 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3670 swapendian_mem_readb,
3671 swapendian_mem_readw,
3672 swapendian_mem_readl
3675 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3676 uint32_t val)
3678 SwapEndianContainer *c = opaque;
3679 c->write[0](c->opaque, addr, val);
3682 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3683 uint32_t val)
3685 SwapEndianContainer *c = opaque;
3686 c->write[1](c->opaque, addr, bswap16(val));
3689 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3690 uint32_t val)
3692 SwapEndianContainer *c = opaque;
3693 c->write[2](c->opaque, addr, bswap32(val));
3696 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3697 swapendian_mem_writeb,
3698 swapendian_mem_writew,
3699 swapendian_mem_writel
3702 static void swapendian_init(int io_index)
3704 SwapEndianContainer *c = g_malloc(sizeof(SwapEndianContainer));
3705 int i;
3707 /* Swap mmio for big endian targets */
3708 c->opaque = io_mem_opaque[io_index];
3709 for (i = 0; i < 3; i++) {
3710 c->read[i] = io_mem_read[io_index][i];
3711 c->write[i] = io_mem_write[io_index][i];
3713 io_mem_read[io_index][i] = swapendian_readfn[i];
3714 io_mem_write[io_index][i] = swapendian_writefn[i];
3716 io_mem_opaque[io_index] = c;
3719 static void swapendian_del(int io_index)
3721 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3722 g_free(io_mem_opaque[io_index]);
3726 /* mem_read and mem_write are arrays of functions containing the
3727 function to access byte (index 0), word (index 1) and dword (index
3728 2). Functions can be omitted with a NULL function pointer.
3729 If io_index is non zero, the corresponding io zone is
3730 modified. If it is zero, a new io zone is allocated. The return
3731 value can be used with cpu_register_physical_memory(). (-1) is
3732 returned if error. */
3733 static int cpu_register_io_memory_fixed(int io_index,
3734 CPUReadMemoryFunc * const *mem_read,
3735 CPUWriteMemoryFunc * const *mem_write,
3736 void *opaque, enum device_endian endian)
3738 int i;
3740 if (io_index <= 0) {
3741 io_index = get_free_io_mem_idx();
3742 if (io_index == -1)
3743 return io_index;
3744 } else {
3745 io_index >>= IO_MEM_SHIFT;
3746 if (io_index >= IO_MEM_NB_ENTRIES)
3747 return -1;
3750 for (i = 0; i < 3; ++i) {
3751 io_mem_read[io_index][i]
3752 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3754 for (i = 0; i < 3; ++i) {
3755 io_mem_write[io_index][i]
3756 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3758 io_mem_opaque[io_index] = opaque;
3760 switch (endian) {
3761 case DEVICE_BIG_ENDIAN:
3762 #ifndef TARGET_WORDS_BIGENDIAN
3763 swapendian_init(io_index);
3764 #endif
3765 break;
3766 case DEVICE_LITTLE_ENDIAN:
3767 #ifdef TARGET_WORDS_BIGENDIAN
3768 swapendian_init(io_index);
3769 #endif
3770 break;
3771 case DEVICE_NATIVE_ENDIAN:
3772 default:
3773 break;
3776 return (io_index << IO_MEM_SHIFT);
3779 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3780 CPUWriteMemoryFunc * const *mem_write,
3781 void *opaque, enum device_endian endian)
3783 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3786 void cpu_unregister_io_memory(int io_table_address)
3788 int i;
3789 int io_index = io_table_address >> IO_MEM_SHIFT;
3791 swapendian_del(io_index);
3793 for (i=0;i < 3; i++) {
3794 io_mem_read[io_index][i] = unassigned_mem_read[i];
3795 io_mem_write[io_index][i] = unassigned_mem_write[i];
3797 io_mem_opaque[io_index] = NULL;
3798 io_mem_used[io_index] = 0;
3801 static void io_mem_init(void)
3803 int i;
3805 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3806 unassigned_mem_write, NULL,
3807 DEVICE_NATIVE_ENDIAN);
3808 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3809 unassigned_mem_write, NULL,
3810 DEVICE_NATIVE_ENDIAN);
3811 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3812 notdirty_mem_write, NULL,
3813 DEVICE_NATIVE_ENDIAN);
3814 for (i=0; i<5; i++)
3815 io_mem_used[i] = 1;
3817 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3818 watch_mem_write, NULL,
3819 DEVICE_NATIVE_ENDIAN);
3822 static void memory_map_init(void)
3824 system_memory = g_malloc(sizeof(*system_memory));
3825 memory_region_init(system_memory, "system", INT64_MAX);
3826 set_system_memory_map(system_memory);
3828 system_io = g_malloc(sizeof(*system_io));
3829 memory_region_init(system_io, "io", 65536);
3830 set_system_io_map(system_io);
3833 MemoryRegion *get_system_memory(void)
3835 return system_memory;
3838 MemoryRegion *get_system_io(void)
3840 return system_io;
3843 #endif /* !defined(CONFIG_USER_ONLY) */
3845 /* physical memory access (slow version, mainly for debug) */
3846 #if defined(CONFIG_USER_ONLY)
3847 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3848 uint8_t *buf, int len, int is_write)
3850 int l, flags;
3851 target_ulong page;
3852 void * p;
3854 while (len > 0) {
3855 page = addr & TARGET_PAGE_MASK;
3856 l = (page + TARGET_PAGE_SIZE) - addr;
3857 if (l > len)
3858 l = len;
3859 flags = page_get_flags(page);
3860 if (!(flags & PAGE_VALID))
3861 return -1;
3862 if (is_write) {
3863 if (!(flags & PAGE_WRITE))
3864 return -1;
3865 /* XXX: this code should not depend on lock_user */
3866 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3867 return -1;
3868 memcpy(p, buf, l);
3869 unlock_user(p, addr, l);
3870 } else {
3871 if (!(flags & PAGE_READ))
3872 return -1;
3873 /* XXX: this code should not depend on lock_user */
3874 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3875 return -1;
3876 memcpy(buf, p, l);
3877 unlock_user(p, addr, 0);
3879 len -= l;
3880 buf += l;
3881 addr += l;
3883 return 0;
3886 #else
3887 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3888 int len, int is_write)
3890 int l, io_index;
3891 uint8_t *ptr;
3892 uint32_t val;
3893 target_phys_addr_t page;
3894 ram_addr_t pd;
3895 PhysPageDesc *p;
3897 while (len > 0) {
3898 page = addr & TARGET_PAGE_MASK;
3899 l = (page + TARGET_PAGE_SIZE) - addr;
3900 if (l > len)
3901 l = len;
3902 p = phys_page_find(page >> TARGET_PAGE_BITS);
3903 if (!p) {
3904 pd = IO_MEM_UNASSIGNED;
3905 } else {
3906 pd = p->phys_offset;
3909 if (is_write) {
3910 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3911 target_phys_addr_t addr1 = addr;
3912 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3913 if (p)
3914 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3915 /* XXX: could force cpu_single_env to NULL to avoid
3916 potential bugs */
3917 if (l >= 4 && ((addr1 & 3) == 0)) {
3918 /* 32 bit write access */
3919 val = ldl_p(buf);
3920 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3921 l = 4;
3922 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3923 /* 16 bit write access */
3924 val = lduw_p(buf);
3925 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3926 l = 2;
3927 } else {
3928 /* 8 bit write access */
3929 val = ldub_p(buf);
3930 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3931 l = 1;
3933 } else {
3934 ram_addr_t addr1;
3935 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3936 /* RAM case */
3937 ptr = qemu_get_ram_ptr(addr1);
3938 memcpy(ptr, buf, l);
3939 if (!cpu_physical_memory_is_dirty(addr1)) {
3940 /* invalidate code */
3941 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3942 /* set dirty bit */
3943 cpu_physical_memory_set_dirty_flags(
3944 addr1, (0xff & ~CODE_DIRTY_FLAG));
3946 qemu_put_ram_ptr(ptr);
3948 } else {
3949 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3950 !(pd & IO_MEM_ROMD)) {
3951 target_phys_addr_t addr1 = addr;
3952 /* I/O case */
3953 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3954 if (p)
3955 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3956 if (l >= 4 && ((addr1 & 3) == 0)) {
3957 /* 32 bit read access */
3958 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3959 stl_p(buf, val);
3960 l = 4;
3961 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3962 /* 16 bit read access */
3963 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3964 stw_p(buf, val);
3965 l = 2;
3966 } else {
3967 /* 8 bit read access */
3968 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3969 stb_p(buf, val);
3970 l = 1;
3972 } else {
3973 /* RAM case */
3974 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3975 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3976 qemu_put_ram_ptr(ptr);
3979 len -= l;
3980 buf += l;
3981 addr += l;
3985 /* used for ROM loading : can write in RAM and ROM */
3986 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3987 const uint8_t *buf, int len)
3989 int l;
3990 uint8_t *ptr;
3991 target_phys_addr_t page;
3992 unsigned long pd;
3993 PhysPageDesc *p;
3995 while (len > 0) {
3996 page = addr & TARGET_PAGE_MASK;
3997 l = (page + TARGET_PAGE_SIZE) - addr;
3998 if (l > len)
3999 l = len;
4000 p = phys_page_find(page >> TARGET_PAGE_BITS);
4001 if (!p) {
4002 pd = IO_MEM_UNASSIGNED;
4003 } else {
4004 pd = p->phys_offset;
4007 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
4008 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
4009 !(pd & IO_MEM_ROMD)) {
4010 /* do nothing */
4011 } else {
4012 unsigned long addr1;
4013 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4014 /* ROM/RAM case */
4015 ptr = qemu_get_ram_ptr(addr1);
4016 memcpy(ptr, buf, l);
4017 qemu_put_ram_ptr(ptr);
4019 len -= l;
4020 buf += l;
4021 addr += l;
4025 typedef struct {
4026 void *buffer;
4027 target_phys_addr_t addr;
4028 target_phys_addr_t len;
4029 } BounceBuffer;
4031 static BounceBuffer bounce;
4033 typedef struct MapClient {
4034 void *opaque;
4035 void (*callback)(void *opaque);
4036 QLIST_ENTRY(MapClient) link;
4037 } MapClient;
4039 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4040 = QLIST_HEAD_INITIALIZER(map_client_list);
4042 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4044 MapClient *client = g_malloc(sizeof(*client));
4046 client->opaque = opaque;
4047 client->callback = callback;
4048 QLIST_INSERT_HEAD(&map_client_list, client, link);
4049 return client;
4052 void cpu_unregister_map_client(void *_client)
4054 MapClient *client = (MapClient *)_client;
4056 QLIST_REMOVE(client, link);
4057 g_free(client);
4060 static void cpu_notify_map_clients(void)
4062 MapClient *client;
4064 while (!QLIST_EMPTY(&map_client_list)) {
4065 client = QLIST_FIRST(&map_client_list);
4066 client->callback(client->opaque);
4067 cpu_unregister_map_client(client);
4071 /* Map a physical memory region into a host virtual address.
4072 * May map a subset of the requested range, given by and returned in *plen.
4073 * May return NULL if resources needed to perform the mapping are exhausted.
4074 * Use only for reads OR writes - not for read-modify-write operations.
4075 * Use cpu_register_map_client() to know when retrying the map operation is
4076 * likely to succeed.
4078 void *cpu_physical_memory_map(target_phys_addr_t addr,
4079 target_phys_addr_t *plen,
4080 int is_write)
4082 target_phys_addr_t len = *plen;
4083 target_phys_addr_t todo = 0;
4084 int l;
4085 target_phys_addr_t page;
4086 unsigned long pd;
4087 PhysPageDesc *p;
4088 ram_addr_t raddr = RAM_ADDR_MAX;
4089 ram_addr_t rlen;
4090 void *ret;
4092 while (len > 0) {
4093 page = addr & TARGET_PAGE_MASK;
4094 l = (page + TARGET_PAGE_SIZE) - addr;
4095 if (l > len)
4096 l = len;
4097 p = phys_page_find(page >> TARGET_PAGE_BITS);
4098 if (!p) {
4099 pd = IO_MEM_UNASSIGNED;
4100 } else {
4101 pd = p->phys_offset;
4104 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4105 if (todo || bounce.buffer) {
4106 break;
4108 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4109 bounce.addr = addr;
4110 bounce.len = l;
4111 if (!is_write) {
4112 cpu_physical_memory_read(addr, bounce.buffer, l);
4115 *plen = l;
4116 return bounce.buffer;
4118 if (!todo) {
4119 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4122 len -= l;
4123 addr += l;
4124 todo += l;
4126 rlen = todo;
4127 ret = qemu_ram_ptr_length(raddr, &rlen);
4128 *plen = rlen;
4129 return ret;
4132 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4133 * Will also mark the memory as dirty if is_write == 1. access_len gives
4134 * the amount of memory that was actually read or written by the caller.
4136 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4137 int is_write, target_phys_addr_t access_len)
4139 if (buffer != bounce.buffer) {
4140 if (is_write) {
4141 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4142 while (access_len) {
4143 unsigned l;
4144 l = TARGET_PAGE_SIZE;
4145 if (l > access_len)
4146 l = access_len;
4147 if (!cpu_physical_memory_is_dirty(addr1)) {
4148 /* invalidate code */
4149 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4150 /* set dirty bit */
4151 cpu_physical_memory_set_dirty_flags(
4152 addr1, (0xff & ~CODE_DIRTY_FLAG));
4154 addr1 += l;
4155 access_len -= l;
4158 if (xen_enabled()) {
4159 xen_invalidate_map_cache_entry(buffer);
4161 return;
4163 if (is_write) {
4164 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4166 qemu_vfree(bounce.buffer);
4167 bounce.buffer = NULL;
4168 cpu_notify_map_clients();
4171 /* warning: addr must be aligned */
4172 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4173 enum device_endian endian)
4175 int io_index;
4176 uint8_t *ptr;
4177 uint32_t val;
4178 unsigned long pd;
4179 PhysPageDesc *p;
4181 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4182 if (!p) {
4183 pd = IO_MEM_UNASSIGNED;
4184 } else {
4185 pd = p->phys_offset;
4188 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4189 !(pd & IO_MEM_ROMD)) {
4190 /* I/O case */
4191 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4192 if (p)
4193 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4194 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4195 #if defined(TARGET_WORDS_BIGENDIAN)
4196 if (endian == DEVICE_LITTLE_ENDIAN) {
4197 val = bswap32(val);
4199 #else
4200 if (endian == DEVICE_BIG_ENDIAN) {
4201 val = bswap32(val);
4203 #endif
4204 } else {
4205 /* RAM case */
4206 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4207 (addr & ~TARGET_PAGE_MASK);
4208 switch (endian) {
4209 case DEVICE_LITTLE_ENDIAN:
4210 val = ldl_le_p(ptr);
4211 break;
4212 case DEVICE_BIG_ENDIAN:
4213 val = ldl_be_p(ptr);
4214 break;
4215 default:
4216 val = ldl_p(ptr);
4217 break;
4220 return val;
4223 uint32_t ldl_phys(target_phys_addr_t addr)
4225 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4228 uint32_t ldl_le_phys(target_phys_addr_t addr)
4230 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4233 uint32_t ldl_be_phys(target_phys_addr_t addr)
4235 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4238 /* warning: addr must be aligned */
4239 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4240 enum device_endian endian)
4242 int io_index;
4243 uint8_t *ptr;
4244 uint64_t val;
4245 unsigned long pd;
4246 PhysPageDesc *p;
4248 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4249 if (!p) {
4250 pd = IO_MEM_UNASSIGNED;
4251 } else {
4252 pd = p->phys_offset;
4255 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4256 !(pd & IO_MEM_ROMD)) {
4257 /* I/O case */
4258 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4259 if (p)
4260 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4262 /* XXX This is broken when device endian != cpu endian.
4263 Fix and add "endian" variable check */
4264 #ifdef TARGET_WORDS_BIGENDIAN
4265 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4266 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4267 #else
4268 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4269 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4270 #endif
4271 } else {
4272 /* RAM case */
4273 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4274 (addr & ~TARGET_PAGE_MASK);
4275 switch (endian) {
4276 case DEVICE_LITTLE_ENDIAN:
4277 val = ldq_le_p(ptr);
4278 break;
4279 case DEVICE_BIG_ENDIAN:
4280 val = ldq_be_p(ptr);
4281 break;
4282 default:
4283 val = ldq_p(ptr);
4284 break;
4287 return val;
4290 uint64_t ldq_phys(target_phys_addr_t addr)
4292 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4295 uint64_t ldq_le_phys(target_phys_addr_t addr)
4297 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4300 uint64_t ldq_be_phys(target_phys_addr_t addr)
4302 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4305 /* XXX: optimize */
4306 uint32_t ldub_phys(target_phys_addr_t addr)
4308 uint8_t val;
4309 cpu_physical_memory_read(addr, &val, 1);
4310 return val;
4313 /* warning: addr must be aligned */
4314 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4315 enum device_endian endian)
4317 int io_index;
4318 uint8_t *ptr;
4319 uint64_t val;
4320 unsigned long pd;
4321 PhysPageDesc *p;
4323 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4324 if (!p) {
4325 pd = IO_MEM_UNASSIGNED;
4326 } else {
4327 pd = p->phys_offset;
4330 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4331 !(pd & IO_MEM_ROMD)) {
4332 /* I/O case */
4333 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4334 if (p)
4335 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4336 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4337 #if defined(TARGET_WORDS_BIGENDIAN)
4338 if (endian == DEVICE_LITTLE_ENDIAN) {
4339 val = bswap16(val);
4341 #else
4342 if (endian == DEVICE_BIG_ENDIAN) {
4343 val = bswap16(val);
4345 #endif
4346 } else {
4347 /* RAM case */
4348 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4349 (addr & ~TARGET_PAGE_MASK);
4350 switch (endian) {
4351 case DEVICE_LITTLE_ENDIAN:
4352 val = lduw_le_p(ptr);
4353 break;
4354 case DEVICE_BIG_ENDIAN:
4355 val = lduw_be_p(ptr);
4356 break;
4357 default:
4358 val = lduw_p(ptr);
4359 break;
4362 return val;
4365 uint32_t lduw_phys(target_phys_addr_t addr)
4367 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4370 uint32_t lduw_le_phys(target_phys_addr_t addr)
4372 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4375 uint32_t lduw_be_phys(target_phys_addr_t addr)
4377 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4380 /* warning: addr must be aligned. The ram page is not masked as dirty
4381 and the code inside is not invalidated. It is useful if the dirty
4382 bits are used to track modified PTEs */
4383 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4385 int io_index;
4386 uint8_t *ptr;
4387 unsigned long pd;
4388 PhysPageDesc *p;
4390 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4391 if (!p) {
4392 pd = IO_MEM_UNASSIGNED;
4393 } else {
4394 pd = p->phys_offset;
4397 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4398 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4399 if (p)
4400 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4401 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4402 } else {
4403 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4404 ptr = qemu_get_ram_ptr(addr1);
4405 stl_p(ptr, val);
4407 if (unlikely(in_migration)) {
4408 if (!cpu_physical_memory_is_dirty(addr1)) {
4409 /* invalidate code */
4410 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4411 /* set dirty bit */
4412 cpu_physical_memory_set_dirty_flags(
4413 addr1, (0xff & ~CODE_DIRTY_FLAG));
4419 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4421 int io_index;
4422 uint8_t *ptr;
4423 unsigned long pd;
4424 PhysPageDesc *p;
4426 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4427 if (!p) {
4428 pd = IO_MEM_UNASSIGNED;
4429 } else {
4430 pd = p->phys_offset;
4433 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4434 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4435 if (p)
4436 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4437 #ifdef TARGET_WORDS_BIGENDIAN
4438 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4439 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4440 #else
4441 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4442 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4443 #endif
4444 } else {
4445 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4446 (addr & ~TARGET_PAGE_MASK);
4447 stq_p(ptr, val);
4451 /* warning: addr must be aligned */
4452 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4453 enum device_endian endian)
4455 int io_index;
4456 uint8_t *ptr;
4457 unsigned long pd;
4458 PhysPageDesc *p;
4460 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4461 if (!p) {
4462 pd = IO_MEM_UNASSIGNED;
4463 } else {
4464 pd = p->phys_offset;
4467 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4468 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4469 if (p)
4470 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4471 #if defined(TARGET_WORDS_BIGENDIAN)
4472 if (endian == DEVICE_LITTLE_ENDIAN) {
4473 val = bswap32(val);
4475 #else
4476 if (endian == DEVICE_BIG_ENDIAN) {
4477 val = bswap32(val);
4479 #endif
4480 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4481 } else {
4482 unsigned long addr1;
4483 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4484 /* RAM case */
4485 ptr = qemu_get_ram_ptr(addr1);
4486 switch (endian) {
4487 case DEVICE_LITTLE_ENDIAN:
4488 stl_le_p(ptr, val);
4489 break;
4490 case DEVICE_BIG_ENDIAN:
4491 stl_be_p(ptr, val);
4492 break;
4493 default:
4494 stl_p(ptr, val);
4495 break;
4497 if (!cpu_physical_memory_is_dirty(addr1)) {
4498 /* invalidate code */
4499 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4500 /* set dirty bit */
4501 cpu_physical_memory_set_dirty_flags(addr1,
4502 (0xff & ~CODE_DIRTY_FLAG));
4507 void stl_phys(target_phys_addr_t addr, uint32_t val)
4509 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4512 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4514 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4517 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4519 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4522 /* XXX: optimize */
4523 void stb_phys(target_phys_addr_t addr, uint32_t val)
4525 uint8_t v = val;
4526 cpu_physical_memory_write(addr, &v, 1);
4529 /* warning: addr must be aligned */
4530 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4531 enum device_endian endian)
4533 int io_index;
4534 uint8_t *ptr;
4535 unsigned long pd;
4536 PhysPageDesc *p;
4538 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4539 if (!p) {
4540 pd = IO_MEM_UNASSIGNED;
4541 } else {
4542 pd = p->phys_offset;
4545 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4546 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4547 if (p)
4548 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4549 #if defined(TARGET_WORDS_BIGENDIAN)
4550 if (endian == DEVICE_LITTLE_ENDIAN) {
4551 val = bswap16(val);
4553 #else
4554 if (endian == DEVICE_BIG_ENDIAN) {
4555 val = bswap16(val);
4557 #endif
4558 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4559 } else {
4560 unsigned long addr1;
4561 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4562 /* RAM case */
4563 ptr = qemu_get_ram_ptr(addr1);
4564 switch (endian) {
4565 case DEVICE_LITTLE_ENDIAN:
4566 stw_le_p(ptr, val);
4567 break;
4568 case DEVICE_BIG_ENDIAN:
4569 stw_be_p(ptr, val);
4570 break;
4571 default:
4572 stw_p(ptr, val);
4573 break;
4575 if (!cpu_physical_memory_is_dirty(addr1)) {
4576 /* invalidate code */
4577 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4578 /* set dirty bit */
4579 cpu_physical_memory_set_dirty_flags(addr1,
4580 (0xff & ~CODE_DIRTY_FLAG));
4585 void stw_phys(target_phys_addr_t addr, uint32_t val)
4587 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4590 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4592 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4595 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4597 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4600 /* XXX: optimize */
4601 void stq_phys(target_phys_addr_t addr, uint64_t val)
4603 val = tswap64(val);
4604 cpu_physical_memory_write(addr, &val, 8);
4607 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4609 val = cpu_to_le64(val);
4610 cpu_physical_memory_write(addr, &val, 8);
4613 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4615 val = cpu_to_be64(val);
4616 cpu_physical_memory_write(addr, &val, 8);
4619 /* virtual memory access for debug (includes writing to ROM) */
4620 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4621 uint8_t *buf, int len, int is_write)
4623 int l;
4624 target_phys_addr_t phys_addr;
4625 target_ulong page;
4627 while (len > 0) {
4628 page = addr & TARGET_PAGE_MASK;
4629 phys_addr = cpu_get_phys_page_debug(env, page);
4630 /* if no physical page mapped, return an error */
4631 if (phys_addr == -1)
4632 return -1;
4633 l = (page + TARGET_PAGE_SIZE) - addr;
4634 if (l > len)
4635 l = len;
4636 phys_addr += (addr & ~TARGET_PAGE_MASK);
4637 if (is_write)
4638 cpu_physical_memory_write_rom(phys_addr, buf, l);
4639 else
4640 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4641 len -= l;
4642 buf += l;
4643 addr += l;
4645 return 0;
4647 #endif
4649 /* in deterministic execution mode, instructions doing device I/Os
4650 must be at the end of the TB */
4651 void cpu_io_recompile(CPUState *env, void *retaddr)
4653 TranslationBlock *tb;
4654 uint32_t n, cflags;
4655 target_ulong pc, cs_base;
4656 uint64_t flags;
4658 tb = tb_find_pc((unsigned long)retaddr);
4659 if (!tb) {
4660 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4661 retaddr);
4663 n = env->icount_decr.u16.low + tb->icount;
4664 cpu_restore_state(tb, env, (unsigned long)retaddr);
4665 /* Calculate how many instructions had been executed before the fault
4666 occurred. */
4667 n = n - env->icount_decr.u16.low;
4668 /* Generate a new TB ending on the I/O insn. */
4669 n++;
4670 /* On MIPS and SH, delay slot instructions can only be restarted if
4671 they were already the first instruction in the TB. If this is not
4672 the first instruction in a TB then re-execute the preceding
4673 branch. */
4674 #if defined(TARGET_MIPS)
4675 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4676 env->active_tc.PC -= 4;
4677 env->icount_decr.u16.low++;
4678 env->hflags &= ~MIPS_HFLAG_BMASK;
4680 #elif defined(TARGET_SH4)
4681 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4682 && n > 1) {
4683 env->pc -= 2;
4684 env->icount_decr.u16.low++;
4685 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4687 #endif
4688 /* This should never happen. */
4689 if (n > CF_COUNT_MASK)
4690 cpu_abort(env, "TB too big during recompile");
4692 cflags = n | CF_LAST_IO;
4693 pc = tb->pc;
4694 cs_base = tb->cs_base;
4695 flags = tb->flags;
4696 tb_phys_invalidate(tb, -1);
4697 /* FIXME: In theory this could raise an exception. In practice
4698 we have already translated the block once so it's probably ok. */
4699 tb_gen_code(env, pc, cs_base, flags, cflags);
4700 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4701 the first in the TB) then we end up generating a whole new TB and
4702 repeating the fault, which is horribly inefficient.
4703 Better would be to execute just this insn uncached, or generate a
4704 second new TB. */
4705 cpu_resume_from_signal(env, NULL);
4708 #if !defined(CONFIG_USER_ONLY)
4710 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4712 int i, target_code_size, max_target_code_size;
4713 int direct_jmp_count, direct_jmp2_count, cross_page;
4714 TranslationBlock *tb;
4716 target_code_size = 0;
4717 max_target_code_size = 0;
4718 cross_page = 0;
4719 direct_jmp_count = 0;
4720 direct_jmp2_count = 0;
4721 for(i = 0; i < nb_tbs; i++) {
4722 tb = &tbs[i];
4723 target_code_size += tb->size;
4724 if (tb->size > max_target_code_size)
4725 max_target_code_size = tb->size;
4726 if (tb->page_addr[1] != -1)
4727 cross_page++;
4728 if (tb->tb_next_offset[0] != 0xffff) {
4729 direct_jmp_count++;
4730 if (tb->tb_next_offset[1] != 0xffff) {
4731 direct_jmp2_count++;
4735 /* XXX: avoid using doubles ? */
4736 cpu_fprintf(f, "Translation buffer state:\n");
4737 cpu_fprintf(f, "gen code size %td/%ld\n",
4738 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4739 cpu_fprintf(f, "TB count %d/%d\n",
4740 nb_tbs, code_gen_max_blocks);
4741 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4742 nb_tbs ? target_code_size / nb_tbs : 0,
4743 max_target_code_size);
4744 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4745 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4746 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4747 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4748 cross_page,
4749 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4750 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4751 direct_jmp_count,
4752 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4753 direct_jmp2_count,
4754 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4755 cpu_fprintf(f, "\nStatistics:\n");
4756 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4757 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4758 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4759 tcg_dump_info(f, cpu_fprintf);
4762 #define MMUSUFFIX _cmmu
4763 #undef GETPC
4764 #define GETPC() NULL
4765 #define env cpu_single_env
4766 #define SOFTMMU_CODE_ACCESS
4768 #define SHIFT 0
4769 #include "softmmu_template.h"
4771 #define SHIFT 1
4772 #include "softmmu_template.h"
4774 #define SHIFT 2
4775 #include "softmmu_template.h"
4777 #define SHIFT 3
4778 #include "softmmu_template.h"
4780 #undef env
4782 #endif