get_maintainer: update to match qemu tree
[qemu.git] / exec.c
blob81808f4f476365c27181b29a81199cd2725c2430
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "exec-all.h"
30 #include "tcg.h"
31 #include "hw/hw.h"
32 #include "hw/qdev.h"
33 #include "osdep.h"
34 #include "kvm.h"
35 #include "hw/xen.h"
36 #include "qemu-timer.h"
37 #if defined(CONFIG_USER_ONLY)
38 #include <qemu.h>
39 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
40 #include <sys/param.h>
41 #if __FreeBSD_version >= 700104
42 #define HAVE_KINFO_GETVMMAP
43 #define sigqueue sigqueue_freebsd /* avoid redefinition */
44 #include <sys/time.h>
45 #include <sys/proc.h>
46 #include <machine/profile.h>
47 #define _KERNEL
48 #include <sys/user.h>
49 #undef _KERNEL
50 #undef sigqueue
51 #include <libutil.h>
52 #endif
53 #endif
54 #else /* !CONFIG_USER_ONLY */
55 #include "xen-mapcache.h"
56 #endif
58 //#define DEBUG_TB_INVALIDATE
59 //#define DEBUG_FLUSH
60 //#define DEBUG_TLB
61 //#define DEBUG_UNASSIGNED
63 /* make various TB consistency checks */
64 //#define DEBUG_TB_CHECK
65 //#define DEBUG_TLB_CHECK
67 //#define DEBUG_IOPORT
68 //#define DEBUG_SUBPAGE
70 #if !defined(CONFIG_USER_ONLY)
71 /* TB consistency checks only implemented for usermode emulation. */
72 #undef DEBUG_TB_CHECK
73 #endif
75 #define SMC_BITMAP_USE_THRESHOLD 10
77 static TranslationBlock *tbs;
78 static int code_gen_max_blocks;
79 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
80 static int nb_tbs;
81 /* any access to the tbs or the page table must use this lock */
82 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
84 #if defined(__arm__) || defined(__sparc_v9__)
85 /* The prologue must be reachable with a direct jump. ARM and Sparc64
86 have limited branch ranges (possibly also PPC) so place it in a
87 section close to code segment. */
88 #define code_gen_section \
89 __attribute__((__section__(".gen_code"))) \
90 __attribute__((aligned (32)))
91 #elif defined(_WIN32)
92 /* Maximum alignment for Win32 is 16. */
93 #define code_gen_section \
94 __attribute__((aligned (16)))
95 #else
96 #define code_gen_section \
97 __attribute__((aligned (32)))
98 #endif
100 uint8_t code_gen_prologue[1024] code_gen_section;
101 static uint8_t *code_gen_buffer;
102 static unsigned long code_gen_buffer_size;
103 /* threshold to flush the translated code buffer */
104 static unsigned long code_gen_buffer_max_size;
105 static uint8_t *code_gen_ptr;
107 #if !defined(CONFIG_USER_ONLY)
108 int phys_ram_fd;
109 static int in_migration;
111 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
112 #endif
114 CPUState *first_cpu;
115 /* current CPU in the current thread. It is only valid inside
116 cpu_exec() */
117 CPUState *cpu_single_env;
118 /* 0 = Do not count executed instructions.
119 1 = Precise instruction counting.
120 2 = Adaptive rate instruction counting. */
121 int use_icount = 0;
122 /* Current instruction counter. While executing translated code this may
123 include some instructions that have not yet been executed. */
124 int64_t qemu_icount;
126 typedef struct PageDesc {
127 /* list of TBs intersecting this ram page */
128 TranslationBlock *first_tb;
129 /* in order to optimize self modifying code, we count the number
130 of lookups we do to a given page to use a bitmap */
131 unsigned int code_write_count;
132 uint8_t *code_bitmap;
133 #if defined(CONFIG_USER_ONLY)
134 unsigned long flags;
135 #endif
136 } PageDesc;
138 /* In system mode we want L1_MAP to be based on ram offsets,
139 while in user mode we want it to be based on virtual addresses. */
140 #if !defined(CONFIG_USER_ONLY)
141 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
142 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
143 #else
144 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
145 #endif
146 #else
147 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
148 #endif
150 /* Size of the L2 (and L3, etc) page tables. */
151 #define L2_BITS 10
152 #define L2_SIZE (1 << L2_BITS)
154 /* The bits remaining after N lower levels of page tables. */
155 #define P_L1_BITS_REM \
156 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
157 #define V_L1_BITS_REM \
158 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
160 /* Size of the L1 page table. Avoid silly small sizes. */
161 #if P_L1_BITS_REM < 4
162 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
163 #else
164 #define P_L1_BITS P_L1_BITS_REM
165 #endif
167 #if V_L1_BITS_REM < 4
168 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
169 #else
170 #define V_L1_BITS V_L1_BITS_REM
171 #endif
173 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
174 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
176 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
177 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
179 unsigned long qemu_real_host_page_size;
180 unsigned long qemu_host_page_bits;
181 unsigned long qemu_host_page_size;
182 unsigned long qemu_host_page_mask;
184 /* This is a multi-level map on the virtual address space.
185 The bottom level has pointers to PageDesc. */
186 static void *l1_map[V_L1_SIZE];
188 #if !defined(CONFIG_USER_ONLY)
189 typedef struct PhysPageDesc {
190 /* offset in host memory of the page + io_index in the low bits */
191 ram_addr_t phys_offset;
192 ram_addr_t region_offset;
193 } PhysPageDesc;
195 /* This is a multi-level map on the physical address space.
196 The bottom level has pointers to PhysPageDesc. */
197 static void *l1_phys_map[P_L1_SIZE];
199 static void io_mem_init(void);
201 /* io memory support */
202 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
203 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
204 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
205 static char io_mem_used[IO_MEM_NB_ENTRIES];
206 static int io_mem_watch;
207 #endif
209 /* log support */
210 #ifdef WIN32
211 static const char *logfilename = "qemu.log";
212 #else
213 static const char *logfilename = "/tmp/qemu.log";
214 #endif
215 FILE *logfile;
216 int loglevel;
217 static int log_append = 0;
219 /* statistics */
220 #if !defined(CONFIG_USER_ONLY)
221 static int tlb_flush_count;
222 #endif
223 static int tb_flush_count;
224 static int tb_phys_invalidate_count;
226 #ifdef _WIN32
227 static void map_exec(void *addr, long size)
229 DWORD old_protect;
230 VirtualProtect(addr, size,
231 PAGE_EXECUTE_READWRITE, &old_protect);
234 #else
235 static void map_exec(void *addr, long size)
237 unsigned long start, end, page_size;
239 page_size = getpagesize();
240 start = (unsigned long)addr;
241 start &= ~(page_size - 1);
243 end = (unsigned long)addr + size;
244 end += page_size - 1;
245 end &= ~(page_size - 1);
247 mprotect((void *)start, end - start,
248 PROT_READ | PROT_WRITE | PROT_EXEC);
250 #endif
252 static void page_init(void)
254 /* NOTE: we can always suppose that qemu_host_page_size >=
255 TARGET_PAGE_SIZE */
256 #ifdef _WIN32
258 SYSTEM_INFO system_info;
260 GetSystemInfo(&system_info);
261 qemu_real_host_page_size = system_info.dwPageSize;
263 #else
264 qemu_real_host_page_size = getpagesize();
265 #endif
266 if (qemu_host_page_size == 0)
267 qemu_host_page_size = qemu_real_host_page_size;
268 if (qemu_host_page_size < TARGET_PAGE_SIZE)
269 qemu_host_page_size = TARGET_PAGE_SIZE;
270 qemu_host_page_bits = 0;
271 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
272 qemu_host_page_bits++;
273 qemu_host_page_mask = ~(qemu_host_page_size - 1);
275 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
277 #ifdef HAVE_KINFO_GETVMMAP
278 struct kinfo_vmentry *freep;
279 int i, cnt;
281 freep = kinfo_getvmmap(getpid(), &cnt);
282 if (freep) {
283 mmap_lock();
284 for (i = 0; i < cnt; i++) {
285 unsigned long startaddr, endaddr;
287 startaddr = freep[i].kve_start;
288 endaddr = freep[i].kve_end;
289 if (h2g_valid(startaddr)) {
290 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
292 if (h2g_valid(endaddr)) {
293 endaddr = h2g(endaddr);
294 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
295 } else {
296 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
297 endaddr = ~0ul;
298 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
299 #endif
303 free(freep);
304 mmap_unlock();
306 #else
307 FILE *f;
309 last_brk = (unsigned long)sbrk(0);
311 f = fopen("/compat/linux/proc/self/maps", "r");
312 if (f) {
313 mmap_lock();
315 do {
316 unsigned long startaddr, endaddr;
317 int n;
319 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
321 if (n == 2 && h2g_valid(startaddr)) {
322 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
324 if (h2g_valid(endaddr)) {
325 endaddr = h2g(endaddr);
326 } else {
327 endaddr = ~0ul;
329 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
331 } while (!feof(f));
333 fclose(f);
334 mmap_unlock();
336 #endif
338 #endif
341 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
343 PageDesc *pd;
344 void **lp;
345 int i;
347 #if defined(CONFIG_USER_ONLY)
348 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
349 # define ALLOC(P, SIZE) \
350 do { \
351 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
352 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
353 } while (0)
354 #else
355 # define ALLOC(P, SIZE) \
356 do { P = qemu_mallocz(SIZE); } while (0)
357 #endif
359 /* Level 1. Always allocated. */
360 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
362 /* Level 2..N-1. */
363 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
364 void **p = *lp;
366 if (p == NULL) {
367 if (!alloc) {
368 return NULL;
370 ALLOC(p, sizeof(void *) * L2_SIZE);
371 *lp = p;
374 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
377 pd = *lp;
378 if (pd == NULL) {
379 if (!alloc) {
380 return NULL;
382 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
383 *lp = pd;
386 #undef ALLOC
388 return pd + (index & (L2_SIZE - 1));
391 static inline PageDesc *page_find(tb_page_addr_t index)
393 return page_find_alloc(index, 0);
396 #if !defined(CONFIG_USER_ONLY)
397 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
399 PhysPageDesc *pd;
400 void **lp;
401 int i;
403 /* Level 1. Always allocated. */
404 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
406 /* Level 2..N-1. */
407 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
408 void **p = *lp;
409 if (p == NULL) {
410 if (!alloc) {
411 return NULL;
413 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
415 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
418 pd = *lp;
419 if (pd == NULL) {
420 int i;
422 if (!alloc) {
423 return NULL;
426 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
428 for (i = 0; i < L2_SIZE; i++) {
429 pd[i].phys_offset = IO_MEM_UNASSIGNED;
430 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
434 return pd + (index & (L2_SIZE - 1));
437 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
439 return phys_page_find_alloc(index, 0);
442 static void tlb_protect_code(ram_addr_t ram_addr);
443 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
444 target_ulong vaddr);
445 #define mmap_lock() do { } while(0)
446 #define mmap_unlock() do { } while(0)
447 #endif
449 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
451 #if defined(CONFIG_USER_ONLY)
452 /* Currently it is not recommended to allocate big chunks of data in
453 user mode. It will change when a dedicated libc will be used */
454 #define USE_STATIC_CODE_GEN_BUFFER
455 #endif
457 #ifdef USE_STATIC_CODE_GEN_BUFFER
458 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
459 __attribute__((aligned (CODE_GEN_ALIGN)));
460 #endif
462 static void code_gen_alloc(unsigned long tb_size)
464 #ifdef USE_STATIC_CODE_GEN_BUFFER
465 code_gen_buffer = static_code_gen_buffer;
466 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
467 map_exec(code_gen_buffer, code_gen_buffer_size);
468 #else
469 code_gen_buffer_size = tb_size;
470 if (code_gen_buffer_size == 0) {
471 #if defined(CONFIG_USER_ONLY)
472 /* in user mode, phys_ram_size is not meaningful */
473 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
474 #else
475 /* XXX: needs adjustments */
476 code_gen_buffer_size = (unsigned long)(ram_size / 4);
477 #endif
479 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
480 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
481 /* The code gen buffer location may have constraints depending on
482 the host cpu and OS */
483 #if defined(__linux__)
485 int flags;
486 void *start = NULL;
488 flags = MAP_PRIVATE | MAP_ANONYMOUS;
489 #if defined(__x86_64__)
490 flags |= MAP_32BIT;
491 /* Cannot map more than that */
492 if (code_gen_buffer_size > (800 * 1024 * 1024))
493 code_gen_buffer_size = (800 * 1024 * 1024);
494 #elif defined(__sparc_v9__)
495 // Map the buffer below 2G, so we can use direct calls and branches
496 flags |= MAP_FIXED;
497 start = (void *) 0x60000000UL;
498 if (code_gen_buffer_size > (512 * 1024 * 1024))
499 code_gen_buffer_size = (512 * 1024 * 1024);
500 #elif defined(__arm__)
501 /* Map the buffer below 32M, so we can use direct calls and branches */
502 flags |= MAP_FIXED;
503 start = (void *) 0x01000000UL;
504 if (code_gen_buffer_size > 16 * 1024 * 1024)
505 code_gen_buffer_size = 16 * 1024 * 1024;
506 #elif defined(__s390x__)
507 /* Map the buffer so that we can use direct calls and branches. */
508 /* We have a +- 4GB range on the branches; leave some slop. */
509 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
510 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
512 start = (void *)0x90000000UL;
513 #endif
514 code_gen_buffer = mmap(start, code_gen_buffer_size,
515 PROT_WRITE | PROT_READ | PROT_EXEC,
516 flags, -1, 0);
517 if (code_gen_buffer == MAP_FAILED) {
518 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
519 exit(1);
522 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
523 || defined(__DragonFly__) || defined(__OpenBSD__)
525 int flags;
526 void *addr = NULL;
527 flags = MAP_PRIVATE | MAP_ANONYMOUS;
528 #if defined(__x86_64__)
529 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
530 * 0x40000000 is free */
531 flags |= MAP_FIXED;
532 addr = (void *)0x40000000;
533 /* Cannot map more than that */
534 if (code_gen_buffer_size > (800 * 1024 * 1024))
535 code_gen_buffer_size = (800 * 1024 * 1024);
536 #elif defined(__sparc_v9__)
537 // Map the buffer below 2G, so we can use direct calls and branches
538 flags |= MAP_FIXED;
539 addr = (void *) 0x60000000UL;
540 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
541 code_gen_buffer_size = (512 * 1024 * 1024);
543 #endif
544 code_gen_buffer = mmap(addr, code_gen_buffer_size,
545 PROT_WRITE | PROT_READ | PROT_EXEC,
546 flags, -1, 0);
547 if (code_gen_buffer == MAP_FAILED) {
548 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
549 exit(1);
552 #else
553 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
554 map_exec(code_gen_buffer, code_gen_buffer_size);
555 #endif
556 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
557 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
558 code_gen_buffer_max_size = code_gen_buffer_size -
559 (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
560 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
561 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
564 /* Must be called before using the QEMU cpus. 'tb_size' is the size
565 (in bytes) allocated to the translation buffer. Zero means default
566 size. */
567 void cpu_exec_init_all(unsigned long tb_size)
569 cpu_gen_init();
570 code_gen_alloc(tb_size);
571 code_gen_ptr = code_gen_buffer;
572 page_init();
573 #if !defined(CONFIG_USER_ONLY)
574 io_mem_init();
575 #endif
576 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
577 /* There's no guest base to take into account, so go ahead and
578 initialize the prologue now. */
579 tcg_prologue_init(&tcg_ctx);
580 #endif
583 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
585 static int cpu_common_post_load(void *opaque, int version_id)
587 CPUState *env = opaque;
589 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
590 version_id is increased. */
591 env->interrupt_request &= ~0x01;
592 tlb_flush(env, 1);
594 return 0;
597 static const VMStateDescription vmstate_cpu_common = {
598 .name = "cpu_common",
599 .version_id = 1,
600 .minimum_version_id = 1,
601 .minimum_version_id_old = 1,
602 .post_load = cpu_common_post_load,
603 .fields = (VMStateField []) {
604 VMSTATE_UINT32(halted, CPUState),
605 VMSTATE_UINT32(interrupt_request, CPUState),
606 VMSTATE_END_OF_LIST()
609 #endif
611 CPUState *qemu_get_cpu(int cpu)
613 CPUState *env = first_cpu;
615 while (env) {
616 if (env->cpu_index == cpu)
617 break;
618 env = env->next_cpu;
621 return env;
624 void cpu_exec_init(CPUState *env)
626 CPUState **penv;
627 int cpu_index;
629 #if defined(CONFIG_USER_ONLY)
630 cpu_list_lock();
631 #endif
632 env->next_cpu = NULL;
633 penv = &first_cpu;
634 cpu_index = 0;
635 while (*penv != NULL) {
636 penv = &(*penv)->next_cpu;
637 cpu_index++;
639 env->cpu_index = cpu_index;
640 env->numa_node = 0;
641 QTAILQ_INIT(&env->breakpoints);
642 QTAILQ_INIT(&env->watchpoints);
643 #ifndef CONFIG_USER_ONLY
644 env->thread_id = qemu_get_thread_id();
645 #endif
646 *penv = env;
647 #if defined(CONFIG_USER_ONLY)
648 cpu_list_unlock();
649 #endif
650 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
651 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
652 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
653 cpu_save, cpu_load, env);
654 #endif
657 /* Allocate a new translation block. Flush the translation buffer if
658 too many translation blocks or too much generated code. */
659 static TranslationBlock *tb_alloc(target_ulong pc)
661 TranslationBlock *tb;
663 if (nb_tbs >= code_gen_max_blocks ||
664 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
665 return NULL;
666 tb = &tbs[nb_tbs++];
667 tb->pc = pc;
668 tb->cflags = 0;
669 return tb;
672 void tb_free(TranslationBlock *tb)
674 /* In practice this is mostly used for single use temporary TB
675 Ignore the hard cases and just back up if this TB happens to
676 be the last one generated. */
677 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
678 code_gen_ptr = tb->tc_ptr;
679 nb_tbs--;
683 static inline void invalidate_page_bitmap(PageDesc *p)
685 if (p->code_bitmap) {
686 qemu_free(p->code_bitmap);
687 p->code_bitmap = NULL;
689 p->code_write_count = 0;
692 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
694 static void page_flush_tb_1 (int level, void **lp)
696 int i;
698 if (*lp == NULL) {
699 return;
701 if (level == 0) {
702 PageDesc *pd = *lp;
703 for (i = 0; i < L2_SIZE; ++i) {
704 pd[i].first_tb = NULL;
705 invalidate_page_bitmap(pd + i);
707 } else {
708 void **pp = *lp;
709 for (i = 0; i < L2_SIZE; ++i) {
710 page_flush_tb_1 (level - 1, pp + i);
715 static void page_flush_tb(void)
717 int i;
718 for (i = 0; i < V_L1_SIZE; i++) {
719 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
723 /* flush all the translation blocks */
724 /* XXX: tb_flush is currently not thread safe */
725 void tb_flush(CPUState *env1)
727 CPUState *env;
728 #if defined(DEBUG_FLUSH)
729 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
730 (unsigned long)(code_gen_ptr - code_gen_buffer),
731 nb_tbs, nb_tbs > 0 ?
732 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
733 #endif
734 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
735 cpu_abort(env1, "Internal error: code buffer overflow\n");
737 nb_tbs = 0;
739 for(env = first_cpu; env != NULL; env = env->next_cpu) {
740 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
743 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
744 page_flush_tb();
746 code_gen_ptr = code_gen_buffer;
747 /* XXX: flush processor icache at this point if cache flush is
748 expensive */
749 tb_flush_count++;
752 #ifdef DEBUG_TB_CHECK
754 static void tb_invalidate_check(target_ulong address)
756 TranslationBlock *tb;
757 int i;
758 address &= TARGET_PAGE_MASK;
759 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
760 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
761 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
762 address >= tb->pc + tb->size)) {
763 printf("ERROR invalidate: address=" TARGET_FMT_lx
764 " PC=%08lx size=%04x\n",
765 address, (long)tb->pc, tb->size);
771 /* verify that all the pages have correct rights for code */
772 static void tb_page_check(void)
774 TranslationBlock *tb;
775 int i, flags1, flags2;
777 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
778 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
779 flags1 = page_get_flags(tb->pc);
780 flags2 = page_get_flags(tb->pc + tb->size - 1);
781 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
782 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
783 (long)tb->pc, tb->size, flags1, flags2);
789 #endif
791 /* invalidate one TB */
792 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
793 int next_offset)
795 TranslationBlock *tb1;
796 for(;;) {
797 tb1 = *ptb;
798 if (tb1 == tb) {
799 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
800 break;
802 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
806 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
808 TranslationBlock *tb1;
809 unsigned int n1;
811 for(;;) {
812 tb1 = *ptb;
813 n1 = (long)tb1 & 3;
814 tb1 = (TranslationBlock *)((long)tb1 & ~3);
815 if (tb1 == tb) {
816 *ptb = tb1->page_next[n1];
817 break;
819 ptb = &tb1->page_next[n1];
823 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
825 TranslationBlock *tb1, **ptb;
826 unsigned int n1;
828 ptb = &tb->jmp_next[n];
829 tb1 = *ptb;
830 if (tb1) {
831 /* find tb(n) in circular list */
832 for(;;) {
833 tb1 = *ptb;
834 n1 = (long)tb1 & 3;
835 tb1 = (TranslationBlock *)((long)tb1 & ~3);
836 if (n1 == n && tb1 == tb)
837 break;
838 if (n1 == 2) {
839 ptb = &tb1->jmp_first;
840 } else {
841 ptb = &tb1->jmp_next[n1];
844 /* now we can suppress tb(n) from the list */
845 *ptb = tb->jmp_next[n];
847 tb->jmp_next[n] = NULL;
851 /* reset the jump entry 'n' of a TB so that it is not chained to
852 another TB */
853 static inline void tb_reset_jump(TranslationBlock *tb, int n)
855 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
858 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
860 CPUState *env;
861 PageDesc *p;
862 unsigned int h, n1;
863 tb_page_addr_t phys_pc;
864 TranslationBlock *tb1, *tb2;
866 /* remove the TB from the hash list */
867 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
868 h = tb_phys_hash_func(phys_pc);
869 tb_remove(&tb_phys_hash[h], tb,
870 offsetof(TranslationBlock, phys_hash_next));
872 /* remove the TB from the page list */
873 if (tb->page_addr[0] != page_addr) {
874 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
875 tb_page_remove(&p->first_tb, tb);
876 invalidate_page_bitmap(p);
878 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
879 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
880 tb_page_remove(&p->first_tb, tb);
881 invalidate_page_bitmap(p);
884 tb_invalidated_flag = 1;
886 /* remove the TB from the hash list */
887 h = tb_jmp_cache_hash_func(tb->pc);
888 for(env = first_cpu; env != NULL; env = env->next_cpu) {
889 if (env->tb_jmp_cache[h] == tb)
890 env->tb_jmp_cache[h] = NULL;
893 /* suppress this TB from the two jump lists */
894 tb_jmp_remove(tb, 0);
895 tb_jmp_remove(tb, 1);
897 /* suppress any remaining jumps to this TB */
898 tb1 = tb->jmp_first;
899 for(;;) {
900 n1 = (long)tb1 & 3;
901 if (n1 == 2)
902 break;
903 tb1 = (TranslationBlock *)((long)tb1 & ~3);
904 tb2 = tb1->jmp_next[n1];
905 tb_reset_jump(tb1, n1);
906 tb1->jmp_next[n1] = NULL;
907 tb1 = tb2;
909 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
911 tb_phys_invalidate_count++;
914 static inline void set_bits(uint8_t *tab, int start, int len)
916 int end, mask, end1;
918 end = start + len;
919 tab += start >> 3;
920 mask = 0xff << (start & 7);
921 if ((start & ~7) == (end & ~7)) {
922 if (start < end) {
923 mask &= ~(0xff << (end & 7));
924 *tab |= mask;
926 } else {
927 *tab++ |= mask;
928 start = (start + 8) & ~7;
929 end1 = end & ~7;
930 while (start < end1) {
931 *tab++ = 0xff;
932 start += 8;
934 if (start < end) {
935 mask = ~(0xff << (end & 7));
936 *tab |= mask;
941 static void build_page_bitmap(PageDesc *p)
943 int n, tb_start, tb_end;
944 TranslationBlock *tb;
946 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
948 tb = p->first_tb;
949 while (tb != NULL) {
950 n = (long)tb & 3;
951 tb = (TranslationBlock *)((long)tb & ~3);
952 /* NOTE: this is subtle as a TB may span two physical pages */
953 if (n == 0) {
954 /* NOTE: tb_end may be after the end of the page, but
955 it is not a problem */
956 tb_start = tb->pc & ~TARGET_PAGE_MASK;
957 tb_end = tb_start + tb->size;
958 if (tb_end > TARGET_PAGE_SIZE)
959 tb_end = TARGET_PAGE_SIZE;
960 } else {
961 tb_start = 0;
962 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
964 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
965 tb = tb->page_next[n];
969 TranslationBlock *tb_gen_code(CPUState *env,
970 target_ulong pc, target_ulong cs_base,
971 int flags, int cflags)
973 TranslationBlock *tb;
974 uint8_t *tc_ptr;
975 tb_page_addr_t phys_pc, phys_page2;
976 target_ulong virt_page2;
977 int code_gen_size;
979 phys_pc = get_page_addr_code(env, pc);
980 tb = tb_alloc(pc);
981 if (!tb) {
982 /* flush must be done */
983 tb_flush(env);
984 /* cannot fail at this point */
985 tb = tb_alloc(pc);
986 /* Don't forget to invalidate previous TB info. */
987 tb_invalidated_flag = 1;
989 tc_ptr = code_gen_ptr;
990 tb->tc_ptr = tc_ptr;
991 tb->cs_base = cs_base;
992 tb->flags = flags;
993 tb->cflags = cflags;
994 cpu_gen_code(env, tb, &code_gen_size);
995 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
997 /* check next page if needed */
998 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
999 phys_page2 = -1;
1000 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1001 phys_page2 = get_page_addr_code(env, virt_page2);
1003 tb_link_page(tb, phys_pc, phys_page2);
1004 return tb;
1007 /* invalidate all TBs which intersect with the target physical page
1008 starting in range [start;end[. NOTE: start and end must refer to
1009 the same physical page. 'is_cpu_write_access' should be true if called
1010 from a real cpu write access: the virtual CPU will exit the current
1011 TB if code is modified inside this TB. */
1012 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1013 int is_cpu_write_access)
1015 TranslationBlock *tb, *tb_next, *saved_tb;
1016 CPUState *env = cpu_single_env;
1017 tb_page_addr_t tb_start, tb_end;
1018 PageDesc *p;
1019 int n;
1020 #ifdef TARGET_HAS_PRECISE_SMC
1021 int current_tb_not_found = is_cpu_write_access;
1022 TranslationBlock *current_tb = NULL;
1023 int current_tb_modified = 0;
1024 target_ulong current_pc = 0;
1025 target_ulong current_cs_base = 0;
1026 int current_flags = 0;
1027 #endif /* TARGET_HAS_PRECISE_SMC */
1029 p = page_find(start >> TARGET_PAGE_BITS);
1030 if (!p)
1031 return;
1032 if (!p->code_bitmap &&
1033 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1034 is_cpu_write_access) {
1035 /* build code bitmap */
1036 build_page_bitmap(p);
1039 /* we remove all the TBs in the range [start, end[ */
1040 /* XXX: see if in some cases it could be faster to invalidate all the code */
1041 tb = p->first_tb;
1042 while (tb != NULL) {
1043 n = (long)tb & 3;
1044 tb = (TranslationBlock *)((long)tb & ~3);
1045 tb_next = tb->page_next[n];
1046 /* NOTE: this is subtle as a TB may span two physical pages */
1047 if (n == 0) {
1048 /* NOTE: tb_end may be after the end of the page, but
1049 it is not a problem */
1050 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1051 tb_end = tb_start + tb->size;
1052 } else {
1053 tb_start = tb->page_addr[1];
1054 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1056 if (!(tb_end <= start || tb_start >= end)) {
1057 #ifdef TARGET_HAS_PRECISE_SMC
1058 if (current_tb_not_found) {
1059 current_tb_not_found = 0;
1060 current_tb = NULL;
1061 if (env->mem_io_pc) {
1062 /* now we have a real cpu fault */
1063 current_tb = tb_find_pc(env->mem_io_pc);
1066 if (current_tb == tb &&
1067 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1068 /* If we are modifying the current TB, we must stop
1069 its execution. We could be more precise by checking
1070 that the modification is after the current PC, but it
1071 would require a specialized function to partially
1072 restore the CPU state */
1074 current_tb_modified = 1;
1075 cpu_restore_state(current_tb, env, env->mem_io_pc);
1076 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1077 &current_flags);
1079 #endif /* TARGET_HAS_PRECISE_SMC */
1080 /* we need to do that to handle the case where a signal
1081 occurs while doing tb_phys_invalidate() */
1082 saved_tb = NULL;
1083 if (env) {
1084 saved_tb = env->current_tb;
1085 env->current_tb = NULL;
1087 tb_phys_invalidate(tb, -1);
1088 if (env) {
1089 env->current_tb = saved_tb;
1090 if (env->interrupt_request && env->current_tb)
1091 cpu_interrupt(env, env->interrupt_request);
1094 tb = tb_next;
1096 #if !defined(CONFIG_USER_ONLY)
1097 /* if no code remaining, no need to continue to use slow writes */
1098 if (!p->first_tb) {
1099 invalidate_page_bitmap(p);
1100 if (is_cpu_write_access) {
1101 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1104 #endif
1105 #ifdef TARGET_HAS_PRECISE_SMC
1106 if (current_tb_modified) {
1107 /* we generate a block containing just the instruction
1108 modifying the memory. It will ensure that it cannot modify
1109 itself */
1110 env->current_tb = NULL;
1111 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1112 cpu_resume_from_signal(env, NULL);
1114 #endif
1117 /* len must be <= 8 and start must be a multiple of len */
1118 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1120 PageDesc *p;
1121 int offset, b;
1122 #if 0
1123 if (1) {
1124 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1125 cpu_single_env->mem_io_vaddr, len,
1126 cpu_single_env->eip,
1127 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1129 #endif
1130 p = page_find(start >> TARGET_PAGE_BITS);
1131 if (!p)
1132 return;
1133 if (p->code_bitmap) {
1134 offset = start & ~TARGET_PAGE_MASK;
1135 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1136 if (b & ((1 << len) - 1))
1137 goto do_invalidate;
1138 } else {
1139 do_invalidate:
1140 tb_invalidate_phys_page_range(start, start + len, 1);
1144 #if !defined(CONFIG_SOFTMMU)
1145 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1146 unsigned long pc, void *puc)
1148 TranslationBlock *tb;
1149 PageDesc *p;
1150 int n;
1151 #ifdef TARGET_HAS_PRECISE_SMC
1152 TranslationBlock *current_tb = NULL;
1153 CPUState *env = cpu_single_env;
1154 int current_tb_modified = 0;
1155 target_ulong current_pc = 0;
1156 target_ulong current_cs_base = 0;
1157 int current_flags = 0;
1158 #endif
1160 addr &= TARGET_PAGE_MASK;
1161 p = page_find(addr >> TARGET_PAGE_BITS);
1162 if (!p)
1163 return;
1164 tb = p->first_tb;
1165 #ifdef TARGET_HAS_PRECISE_SMC
1166 if (tb && pc != 0) {
1167 current_tb = tb_find_pc(pc);
1169 #endif
1170 while (tb != NULL) {
1171 n = (long)tb & 3;
1172 tb = (TranslationBlock *)((long)tb & ~3);
1173 #ifdef TARGET_HAS_PRECISE_SMC
1174 if (current_tb == tb &&
1175 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1176 /* If we are modifying the current TB, we must stop
1177 its execution. We could be more precise by checking
1178 that the modification is after the current PC, but it
1179 would require a specialized function to partially
1180 restore the CPU state */
1182 current_tb_modified = 1;
1183 cpu_restore_state(current_tb, env, pc);
1184 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1185 &current_flags);
1187 #endif /* TARGET_HAS_PRECISE_SMC */
1188 tb_phys_invalidate(tb, addr);
1189 tb = tb->page_next[n];
1191 p->first_tb = NULL;
1192 #ifdef TARGET_HAS_PRECISE_SMC
1193 if (current_tb_modified) {
1194 /* we generate a block containing just the instruction
1195 modifying the memory. It will ensure that it cannot modify
1196 itself */
1197 env->current_tb = NULL;
1198 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1199 cpu_resume_from_signal(env, puc);
1201 #endif
1203 #endif
1205 /* add the tb in the target page and protect it if necessary */
1206 static inline void tb_alloc_page(TranslationBlock *tb,
1207 unsigned int n, tb_page_addr_t page_addr)
1209 PageDesc *p;
1210 TranslationBlock *last_first_tb;
1212 tb->page_addr[n] = page_addr;
1213 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1214 tb->page_next[n] = p->first_tb;
1215 last_first_tb = p->first_tb;
1216 p->first_tb = (TranslationBlock *)((long)tb | n);
1217 invalidate_page_bitmap(p);
1219 #if defined(TARGET_HAS_SMC) || 1
1221 #if defined(CONFIG_USER_ONLY)
1222 if (p->flags & PAGE_WRITE) {
1223 target_ulong addr;
1224 PageDesc *p2;
1225 int prot;
1227 /* force the host page as non writable (writes will have a
1228 page fault + mprotect overhead) */
1229 page_addr &= qemu_host_page_mask;
1230 prot = 0;
1231 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1232 addr += TARGET_PAGE_SIZE) {
1234 p2 = page_find (addr >> TARGET_PAGE_BITS);
1235 if (!p2)
1236 continue;
1237 prot |= p2->flags;
1238 p2->flags &= ~PAGE_WRITE;
1240 mprotect(g2h(page_addr), qemu_host_page_size,
1241 (prot & PAGE_BITS) & ~PAGE_WRITE);
1242 #ifdef DEBUG_TB_INVALIDATE
1243 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1244 page_addr);
1245 #endif
1247 #else
1248 /* if some code is already present, then the pages are already
1249 protected. So we handle the case where only the first TB is
1250 allocated in a physical page */
1251 if (!last_first_tb) {
1252 tlb_protect_code(page_addr);
1254 #endif
1256 #endif /* TARGET_HAS_SMC */
1259 /* add a new TB and link it to the physical page tables. phys_page2 is
1260 (-1) to indicate that only one page contains the TB. */
1261 void tb_link_page(TranslationBlock *tb,
1262 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1264 unsigned int h;
1265 TranslationBlock **ptb;
1267 /* Grab the mmap lock to stop another thread invalidating this TB
1268 before we are done. */
1269 mmap_lock();
1270 /* add in the physical hash table */
1271 h = tb_phys_hash_func(phys_pc);
1272 ptb = &tb_phys_hash[h];
1273 tb->phys_hash_next = *ptb;
1274 *ptb = tb;
1276 /* add in the page list */
1277 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1278 if (phys_page2 != -1)
1279 tb_alloc_page(tb, 1, phys_page2);
1280 else
1281 tb->page_addr[1] = -1;
1283 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1284 tb->jmp_next[0] = NULL;
1285 tb->jmp_next[1] = NULL;
1287 /* init original jump addresses */
1288 if (tb->tb_next_offset[0] != 0xffff)
1289 tb_reset_jump(tb, 0);
1290 if (tb->tb_next_offset[1] != 0xffff)
1291 tb_reset_jump(tb, 1);
1293 #ifdef DEBUG_TB_CHECK
1294 tb_page_check();
1295 #endif
1296 mmap_unlock();
1299 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1300 tb[1].tc_ptr. Return NULL if not found */
1301 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1303 int m_min, m_max, m;
1304 unsigned long v;
1305 TranslationBlock *tb;
1307 if (nb_tbs <= 0)
1308 return NULL;
1309 if (tc_ptr < (unsigned long)code_gen_buffer ||
1310 tc_ptr >= (unsigned long)code_gen_ptr)
1311 return NULL;
1312 /* binary search (cf Knuth) */
1313 m_min = 0;
1314 m_max = nb_tbs - 1;
1315 while (m_min <= m_max) {
1316 m = (m_min + m_max) >> 1;
1317 tb = &tbs[m];
1318 v = (unsigned long)tb->tc_ptr;
1319 if (v == tc_ptr)
1320 return tb;
1321 else if (tc_ptr < v) {
1322 m_max = m - 1;
1323 } else {
1324 m_min = m + 1;
1327 return &tbs[m_max];
1330 static void tb_reset_jump_recursive(TranslationBlock *tb);
1332 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1334 TranslationBlock *tb1, *tb_next, **ptb;
1335 unsigned int n1;
1337 tb1 = tb->jmp_next[n];
1338 if (tb1 != NULL) {
1339 /* find head of list */
1340 for(;;) {
1341 n1 = (long)tb1 & 3;
1342 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1343 if (n1 == 2)
1344 break;
1345 tb1 = tb1->jmp_next[n1];
1347 /* we are now sure now that tb jumps to tb1 */
1348 tb_next = tb1;
1350 /* remove tb from the jmp_first list */
1351 ptb = &tb_next->jmp_first;
1352 for(;;) {
1353 tb1 = *ptb;
1354 n1 = (long)tb1 & 3;
1355 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1356 if (n1 == n && tb1 == tb)
1357 break;
1358 ptb = &tb1->jmp_next[n1];
1360 *ptb = tb->jmp_next[n];
1361 tb->jmp_next[n] = NULL;
1363 /* suppress the jump to next tb in generated code */
1364 tb_reset_jump(tb, n);
1366 /* suppress jumps in the tb on which we could have jumped */
1367 tb_reset_jump_recursive(tb_next);
1371 static void tb_reset_jump_recursive(TranslationBlock *tb)
1373 tb_reset_jump_recursive2(tb, 0);
1374 tb_reset_jump_recursive2(tb, 1);
1377 #if defined(TARGET_HAS_ICE)
1378 #if defined(CONFIG_USER_ONLY)
1379 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1381 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1383 #else
1384 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1386 target_phys_addr_t addr;
1387 target_ulong pd;
1388 ram_addr_t ram_addr;
1389 PhysPageDesc *p;
1391 addr = cpu_get_phys_page_debug(env, pc);
1392 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1393 if (!p) {
1394 pd = IO_MEM_UNASSIGNED;
1395 } else {
1396 pd = p->phys_offset;
1398 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1399 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1401 #endif
1402 #endif /* TARGET_HAS_ICE */
1404 #if defined(CONFIG_USER_ONLY)
1405 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1410 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1411 int flags, CPUWatchpoint **watchpoint)
1413 return -ENOSYS;
1415 #else
1416 /* Add a watchpoint. */
1417 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1418 int flags, CPUWatchpoint **watchpoint)
1420 target_ulong len_mask = ~(len - 1);
1421 CPUWatchpoint *wp;
1423 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1424 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1425 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1426 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1427 return -EINVAL;
1429 wp = qemu_malloc(sizeof(*wp));
1431 wp->vaddr = addr;
1432 wp->len_mask = len_mask;
1433 wp->flags = flags;
1435 /* keep all GDB-injected watchpoints in front */
1436 if (flags & BP_GDB)
1437 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1438 else
1439 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1441 tlb_flush_page(env, addr);
1443 if (watchpoint)
1444 *watchpoint = wp;
1445 return 0;
1448 /* Remove a specific watchpoint. */
1449 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1450 int flags)
1452 target_ulong len_mask = ~(len - 1);
1453 CPUWatchpoint *wp;
1455 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1456 if (addr == wp->vaddr && len_mask == wp->len_mask
1457 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1458 cpu_watchpoint_remove_by_ref(env, wp);
1459 return 0;
1462 return -ENOENT;
1465 /* Remove a specific watchpoint by reference. */
1466 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1468 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1470 tlb_flush_page(env, watchpoint->vaddr);
1472 qemu_free(watchpoint);
1475 /* Remove all matching watchpoints. */
1476 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1478 CPUWatchpoint *wp, *next;
1480 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1481 if (wp->flags & mask)
1482 cpu_watchpoint_remove_by_ref(env, wp);
1485 #endif
1487 /* Add a breakpoint. */
1488 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1489 CPUBreakpoint **breakpoint)
1491 #if defined(TARGET_HAS_ICE)
1492 CPUBreakpoint *bp;
1494 bp = qemu_malloc(sizeof(*bp));
1496 bp->pc = pc;
1497 bp->flags = flags;
1499 /* keep all GDB-injected breakpoints in front */
1500 if (flags & BP_GDB)
1501 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1502 else
1503 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1505 breakpoint_invalidate(env, pc);
1507 if (breakpoint)
1508 *breakpoint = bp;
1509 return 0;
1510 #else
1511 return -ENOSYS;
1512 #endif
1515 /* Remove a specific breakpoint. */
1516 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1518 #if defined(TARGET_HAS_ICE)
1519 CPUBreakpoint *bp;
1521 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1522 if (bp->pc == pc && bp->flags == flags) {
1523 cpu_breakpoint_remove_by_ref(env, bp);
1524 return 0;
1527 return -ENOENT;
1528 #else
1529 return -ENOSYS;
1530 #endif
1533 /* Remove a specific breakpoint by reference. */
1534 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1536 #if defined(TARGET_HAS_ICE)
1537 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1539 breakpoint_invalidate(env, breakpoint->pc);
1541 qemu_free(breakpoint);
1542 #endif
1545 /* Remove all matching breakpoints. */
1546 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1548 #if defined(TARGET_HAS_ICE)
1549 CPUBreakpoint *bp, *next;
1551 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1552 if (bp->flags & mask)
1553 cpu_breakpoint_remove_by_ref(env, bp);
1555 #endif
1558 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1559 CPU loop after each instruction */
1560 void cpu_single_step(CPUState *env, int enabled)
1562 #if defined(TARGET_HAS_ICE)
1563 if (env->singlestep_enabled != enabled) {
1564 env->singlestep_enabled = enabled;
1565 if (kvm_enabled())
1566 kvm_update_guest_debug(env, 0);
1567 else {
1568 /* must flush all the translated code to avoid inconsistencies */
1569 /* XXX: only flush what is necessary */
1570 tb_flush(env);
1573 #endif
1576 /* enable or disable low levels log */
1577 void cpu_set_log(int log_flags)
1579 loglevel = log_flags;
1580 if (loglevel && !logfile) {
1581 logfile = fopen(logfilename, log_append ? "a" : "w");
1582 if (!logfile) {
1583 perror(logfilename);
1584 _exit(1);
1586 #if !defined(CONFIG_SOFTMMU)
1587 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1589 static char logfile_buf[4096];
1590 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1592 #elif !defined(_WIN32)
1593 /* Win32 doesn't support line-buffering and requires size >= 2 */
1594 setvbuf(logfile, NULL, _IOLBF, 0);
1595 #endif
1596 log_append = 1;
1598 if (!loglevel && logfile) {
1599 fclose(logfile);
1600 logfile = NULL;
1604 void cpu_set_log_filename(const char *filename)
1606 logfilename = strdup(filename);
1607 if (logfile) {
1608 fclose(logfile);
1609 logfile = NULL;
1611 cpu_set_log(loglevel);
1614 static void cpu_unlink_tb(CPUState *env)
1616 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1617 problem and hope the cpu will stop of its own accord. For userspace
1618 emulation this often isn't actually as bad as it sounds. Often
1619 signals are used primarily to interrupt blocking syscalls. */
1620 TranslationBlock *tb;
1621 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1623 spin_lock(&interrupt_lock);
1624 tb = env->current_tb;
1625 /* if the cpu is currently executing code, we must unlink it and
1626 all the potentially executing TB */
1627 if (tb) {
1628 env->current_tb = NULL;
1629 tb_reset_jump_recursive(tb);
1631 spin_unlock(&interrupt_lock);
1634 #ifndef CONFIG_USER_ONLY
1635 /* mask must never be zero, except for A20 change call */
1636 static void tcg_handle_interrupt(CPUState *env, int mask)
1638 int old_mask;
1640 old_mask = env->interrupt_request;
1641 env->interrupt_request |= mask;
1644 * If called from iothread context, wake the target cpu in
1645 * case its halted.
1647 if (!qemu_cpu_is_self(env)) {
1648 qemu_cpu_kick(env);
1649 return;
1652 if (use_icount) {
1653 env->icount_decr.u16.high = 0xffff;
1654 if (!can_do_io(env)
1655 && (mask & ~old_mask) != 0) {
1656 cpu_abort(env, "Raised interrupt while not in I/O function");
1658 } else {
1659 cpu_unlink_tb(env);
1663 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1665 #else /* CONFIG_USER_ONLY */
1667 void cpu_interrupt(CPUState *env, int mask)
1669 env->interrupt_request |= mask;
1670 cpu_unlink_tb(env);
1672 #endif /* CONFIG_USER_ONLY */
1674 void cpu_reset_interrupt(CPUState *env, int mask)
1676 env->interrupt_request &= ~mask;
1679 void cpu_exit(CPUState *env)
1681 env->exit_request = 1;
1682 cpu_unlink_tb(env);
1685 const CPULogItem cpu_log_items[] = {
1686 { CPU_LOG_TB_OUT_ASM, "out_asm",
1687 "show generated host assembly code for each compiled TB" },
1688 { CPU_LOG_TB_IN_ASM, "in_asm",
1689 "show target assembly code for each compiled TB" },
1690 { CPU_LOG_TB_OP, "op",
1691 "show micro ops for each compiled TB" },
1692 { CPU_LOG_TB_OP_OPT, "op_opt",
1693 "show micro ops "
1694 #ifdef TARGET_I386
1695 "before eflags optimization and "
1696 #endif
1697 "after liveness analysis" },
1698 { CPU_LOG_INT, "int",
1699 "show interrupts/exceptions in short format" },
1700 { CPU_LOG_EXEC, "exec",
1701 "show trace before each executed TB (lots of logs)" },
1702 { CPU_LOG_TB_CPU, "cpu",
1703 "show CPU state before block translation" },
1704 #ifdef TARGET_I386
1705 { CPU_LOG_PCALL, "pcall",
1706 "show protected mode far calls/returns/exceptions" },
1707 { CPU_LOG_RESET, "cpu_reset",
1708 "show CPU state before CPU resets" },
1709 #endif
1710 #ifdef DEBUG_IOPORT
1711 { CPU_LOG_IOPORT, "ioport",
1712 "show all i/o ports accesses" },
1713 #endif
1714 { 0, NULL, NULL },
1717 #ifndef CONFIG_USER_ONLY
1718 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1719 = QLIST_HEAD_INITIALIZER(memory_client_list);
1721 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1722 ram_addr_t size,
1723 ram_addr_t phys_offset,
1724 bool log_dirty)
1726 CPUPhysMemoryClient *client;
1727 QLIST_FOREACH(client, &memory_client_list, list) {
1728 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1732 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1733 target_phys_addr_t end)
1735 CPUPhysMemoryClient *client;
1736 QLIST_FOREACH(client, &memory_client_list, list) {
1737 int r = client->sync_dirty_bitmap(client, start, end);
1738 if (r < 0)
1739 return r;
1741 return 0;
1744 static int cpu_notify_migration_log(int enable)
1746 CPUPhysMemoryClient *client;
1747 QLIST_FOREACH(client, &memory_client_list, list) {
1748 int r = client->migration_log(client, enable);
1749 if (r < 0)
1750 return r;
1752 return 0;
1755 struct last_map {
1756 target_phys_addr_t start_addr;
1757 ram_addr_t size;
1758 ram_addr_t phys_offset;
1761 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1762 * address. Each intermediate table provides the next L2_BITs of guest
1763 * physical address space. The number of levels vary based on host and
1764 * guest configuration, making it efficient to build the final guest
1765 * physical address by seeding the L1 offset and shifting and adding in
1766 * each L2 offset as we recurse through them. */
1767 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1768 void **lp, target_phys_addr_t addr,
1769 struct last_map *map)
1771 int i;
1773 if (*lp == NULL) {
1774 return;
1776 if (level == 0) {
1777 PhysPageDesc *pd = *lp;
1778 addr <<= L2_BITS + TARGET_PAGE_BITS;
1779 for (i = 0; i < L2_SIZE; ++i) {
1780 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1781 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1783 if (map->size &&
1784 start_addr == map->start_addr + map->size &&
1785 pd[i].phys_offset == map->phys_offset + map->size) {
1787 map->size += TARGET_PAGE_SIZE;
1788 continue;
1789 } else if (map->size) {
1790 client->set_memory(client, map->start_addr,
1791 map->size, map->phys_offset, false);
1794 map->start_addr = start_addr;
1795 map->size = TARGET_PAGE_SIZE;
1796 map->phys_offset = pd[i].phys_offset;
1799 } else {
1800 void **pp = *lp;
1801 for (i = 0; i < L2_SIZE; ++i) {
1802 phys_page_for_each_1(client, level - 1, pp + i,
1803 (addr << L2_BITS) | i, map);
1808 static void phys_page_for_each(CPUPhysMemoryClient *client)
1810 int i;
1811 struct last_map map = { };
1813 for (i = 0; i < P_L1_SIZE; ++i) {
1814 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1815 l1_phys_map + i, i, &map);
1817 if (map.size) {
1818 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1819 false);
1823 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1825 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1826 phys_page_for_each(client);
1829 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1831 QLIST_REMOVE(client, list);
1833 #endif
1835 static int cmp1(const char *s1, int n, const char *s2)
1837 if (strlen(s2) != n)
1838 return 0;
1839 return memcmp(s1, s2, n) == 0;
1842 /* takes a comma separated list of log masks. Return 0 if error. */
1843 int cpu_str_to_log_mask(const char *str)
1845 const CPULogItem *item;
1846 int mask;
1847 const char *p, *p1;
1849 p = str;
1850 mask = 0;
1851 for(;;) {
1852 p1 = strchr(p, ',');
1853 if (!p1)
1854 p1 = p + strlen(p);
1855 if(cmp1(p,p1-p,"all")) {
1856 for(item = cpu_log_items; item->mask != 0; item++) {
1857 mask |= item->mask;
1859 } else {
1860 for(item = cpu_log_items; item->mask != 0; item++) {
1861 if (cmp1(p, p1 - p, item->name))
1862 goto found;
1864 return 0;
1866 found:
1867 mask |= item->mask;
1868 if (*p1 != ',')
1869 break;
1870 p = p1 + 1;
1872 return mask;
1875 void cpu_abort(CPUState *env, const char *fmt, ...)
1877 va_list ap;
1878 va_list ap2;
1880 va_start(ap, fmt);
1881 va_copy(ap2, ap);
1882 fprintf(stderr, "qemu: fatal: ");
1883 vfprintf(stderr, fmt, ap);
1884 fprintf(stderr, "\n");
1885 #ifdef TARGET_I386
1886 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1887 #else
1888 cpu_dump_state(env, stderr, fprintf, 0);
1889 #endif
1890 if (qemu_log_enabled()) {
1891 qemu_log("qemu: fatal: ");
1892 qemu_log_vprintf(fmt, ap2);
1893 qemu_log("\n");
1894 #ifdef TARGET_I386
1895 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1896 #else
1897 log_cpu_state(env, 0);
1898 #endif
1899 qemu_log_flush();
1900 qemu_log_close();
1902 va_end(ap2);
1903 va_end(ap);
1904 #if defined(CONFIG_USER_ONLY)
1906 struct sigaction act;
1907 sigfillset(&act.sa_mask);
1908 act.sa_handler = SIG_DFL;
1909 sigaction(SIGABRT, &act, NULL);
1911 #endif
1912 abort();
1915 CPUState *cpu_copy(CPUState *env)
1917 CPUState *new_env = cpu_init(env->cpu_model_str);
1918 CPUState *next_cpu = new_env->next_cpu;
1919 int cpu_index = new_env->cpu_index;
1920 #if defined(TARGET_HAS_ICE)
1921 CPUBreakpoint *bp;
1922 CPUWatchpoint *wp;
1923 #endif
1925 memcpy(new_env, env, sizeof(CPUState));
1927 /* Preserve chaining and index. */
1928 new_env->next_cpu = next_cpu;
1929 new_env->cpu_index = cpu_index;
1931 /* Clone all break/watchpoints.
1932 Note: Once we support ptrace with hw-debug register access, make sure
1933 BP_CPU break/watchpoints are handled correctly on clone. */
1934 QTAILQ_INIT(&env->breakpoints);
1935 QTAILQ_INIT(&env->watchpoints);
1936 #if defined(TARGET_HAS_ICE)
1937 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1938 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1940 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1941 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1942 wp->flags, NULL);
1944 #endif
1946 return new_env;
1949 #if !defined(CONFIG_USER_ONLY)
1951 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1953 unsigned int i;
1955 /* Discard jump cache entries for any tb which might potentially
1956 overlap the flushed page. */
1957 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1958 memset (&env->tb_jmp_cache[i], 0,
1959 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1961 i = tb_jmp_cache_hash_page(addr);
1962 memset (&env->tb_jmp_cache[i], 0,
1963 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1966 static CPUTLBEntry s_cputlb_empty_entry = {
1967 .addr_read = -1,
1968 .addr_write = -1,
1969 .addr_code = -1,
1970 .addend = -1,
1973 /* NOTE: if flush_global is true, also flush global entries (not
1974 implemented yet) */
1975 void tlb_flush(CPUState *env, int flush_global)
1977 int i;
1979 #if defined(DEBUG_TLB)
1980 printf("tlb_flush:\n");
1981 #endif
1982 /* must reset current TB so that interrupts cannot modify the
1983 links while we are modifying them */
1984 env->current_tb = NULL;
1986 for(i = 0; i < CPU_TLB_SIZE; i++) {
1987 int mmu_idx;
1988 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1989 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1993 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1995 env->tlb_flush_addr = -1;
1996 env->tlb_flush_mask = 0;
1997 tlb_flush_count++;
2000 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2002 if (addr == (tlb_entry->addr_read &
2003 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2004 addr == (tlb_entry->addr_write &
2005 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2006 addr == (tlb_entry->addr_code &
2007 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2008 *tlb_entry = s_cputlb_empty_entry;
2012 void tlb_flush_page(CPUState *env, target_ulong addr)
2014 int i;
2015 int mmu_idx;
2017 #if defined(DEBUG_TLB)
2018 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2019 #endif
2020 /* Check if we need to flush due to large pages. */
2021 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2022 #if defined(DEBUG_TLB)
2023 printf("tlb_flush_page: forced full flush ("
2024 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2025 env->tlb_flush_addr, env->tlb_flush_mask);
2026 #endif
2027 tlb_flush(env, 1);
2028 return;
2030 /* must reset current TB so that interrupts cannot modify the
2031 links while we are modifying them */
2032 env->current_tb = NULL;
2034 addr &= TARGET_PAGE_MASK;
2035 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2036 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2037 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2039 tlb_flush_jmp_cache(env, addr);
2042 /* update the TLBs so that writes to code in the virtual page 'addr'
2043 can be detected */
2044 static void tlb_protect_code(ram_addr_t ram_addr)
2046 cpu_physical_memory_reset_dirty(ram_addr,
2047 ram_addr + TARGET_PAGE_SIZE,
2048 CODE_DIRTY_FLAG);
2051 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2052 tested for self modifying code */
2053 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2054 target_ulong vaddr)
2056 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2059 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2060 unsigned long start, unsigned long length)
2062 unsigned long addr;
2063 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2064 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2065 if ((addr - start) < length) {
2066 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2071 /* Note: start and end must be within the same ram block. */
2072 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2073 int dirty_flags)
2075 CPUState *env;
2076 unsigned long length, start1;
2077 int i;
2079 start &= TARGET_PAGE_MASK;
2080 end = TARGET_PAGE_ALIGN(end);
2082 length = end - start;
2083 if (length == 0)
2084 return;
2085 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2087 /* we modify the TLB cache so that the dirty bit will be set again
2088 when accessing the range */
2089 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2090 /* Check that we don't span multiple blocks - this breaks the
2091 address comparisons below. */
2092 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2093 != (end - 1) - start) {
2094 abort();
2097 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2098 int mmu_idx;
2099 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2100 for(i = 0; i < CPU_TLB_SIZE; i++)
2101 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2102 start1, length);
2107 int cpu_physical_memory_set_dirty_tracking(int enable)
2109 int ret = 0;
2110 in_migration = enable;
2111 ret = cpu_notify_migration_log(!!enable);
2112 return ret;
2115 int cpu_physical_memory_get_dirty_tracking(void)
2117 return in_migration;
2120 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2121 target_phys_addr_t end_addr)
2123 int ret;
2125 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2126 return ret;
2129 int cpu_physical_log_start(target_phys_addr_t start_addr,
2130 ram_addr_t size)
2132 CPUPhysMemoryClient *client;
2133 QLIST_FOREACH(client, &memory_client_list, list) {
2134 if (client->log_start) {
2135 int r = client->log_start(client, start_addr, size);
2136 if (r < 0) {
2137 return r;
2141 return 0;
2144 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2145 ram_addr_t size)
2147 CPUPhysMemoryClient *client;
2148 QLIST_FOREACH(client, &memory_client_list, list) {
2149 if (client->log_stop) {
2150 int r = client->log_stop(client, start_addr, size);
2151 if (r < 0) {
2152 return r;
2156 return 0;
2159 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2161 ram_addr_t ram_addr;
2162 void *p;
2164 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2165 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2166 + tlb_entry->addend);
2167 ram_addr = qemu_ram_addr_from_host_nofail(p);
2168 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2169 tlb_entry->addr_write |= TLB_NOTDIRTY;
2174 /* update the TLB according to the current state of the dirty bits */
2175 void cpu_tlb_update_dirty(CPUState *env)
2177 int i;
2178 int mmu_idx;
2179 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2180 for(i = 0; i < CPU_TLB_SIZE; i++)
2181 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2185 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2187 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2188 tlb_entry->addr_write = vaddr;
2191 /* update the TLB corresponding to virtual page vaddr
2192 so that it is no longer dirty */
2193 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2195 int i;
2196 int mmu_idx;
2198 vaddr &= TARGET_PAGE_MASK;
2199 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2200 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2201 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2204 /* Our TLB does not support large pages, so remember the area covered by
2205 large pages and trigger a full TLB flush if these are invalidated. */
2206 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2207 target_ulong size)
2209 target_ulong mask = ~(size - 1);
2211 if (env->tlb_flush_addr == (target_ulong)-1) {
2212 env->tlb_flush_addr = vaddr & mask;
2213 env->tlb_flush_mask = mask;
2214 return;
2216 /* Extend the existing region to include the new page.
2217 This is a compromise between unnecessary flushes and the cost
2218 of maintaining a full variable size TLB. */
2219 mask &= env->tlb_flush_mask;
2220 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2221 mask <<= 1;
2223 env->tlb_flush_addr &= mask;
2224 env->tlb_flush_mask = mask;
2227 /* Add a new TLB entry. At most one entry for a given virtual address
2228 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2229 supplied size is only used by tlb_flush_page. */
2230 void tlb_set_page(CPUState *env, target_ulong vaddr,
2231 target_phys_addr_t paddr, int prot,
2232 int mmu_idx, target_ulong size)
2234 PhysPageDesc *p;
2235 unsigned long pd;
2236 unsigned int index;
2237 target_ulong address;
2238 target_ulong code_address;
2239 unsigned long addend;
2240 CPUTLBEntry *te;
2241 CPUWatchpoint *wp;
2242 target_phys_addr_t iotlb;
2244 assert(size >= TARGET_PAGE_SIZE);
2245 if (size != TARGET_PAGE_SIZE) {
2246 tlb_add_large_page(env, vaddr, size);
2248 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2249 if (!p) {
2250 pd = IO_MEM_UNASSIGNED;
2251 } else {
2252 pd = p->phys_offset;
2254 #if defined(DEBUG_TLB)
2255 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2256 " prot=%x idx=%d pd=0x%08lx\n",
2257 vaddr, paddr, prot, mmu_idx, pd);
2258 #endif
2260 address = vaddr;
2261 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2262 /* IO memory case (romd handled later) */
2263 address |= TLB_MMIO;
2265 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2266 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2267 /* Normal RAM. */
2268 iotlb = pd & TARGET_PAGE_MASK;
2269 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2270 iotlb |= IO_MEM_NOTDIRTY;
2271 else
2272 iotlb |= IO_MEM_ROM;
2273 } else {
2274 /* IO handlers are currently passed a physical address.
2275 It would be nice to pass an offset from the base address
2276 of that region. This would avoid having to special case RAM,
2277 and avoid full address decoding in every device.
2278 We can't use the high bits of pd for this because
2279 IO_MEM_ROMD uses these as a ram address. */
2280 iotlb = (pd & ~TARGET_PAGE_MASK);
2281 if (p) {
2282 iotlb += p->region_offset;
2283 } else {
2284 iotlb += paddr;
2288 code_address = address;
2289 /* Make accesses to pages with watchpoints go via the
2290 watchpoint trap routines. */
2291 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2292 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2293 /* Avoid trapping reads of pages with a write breakpoint. */
2294 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2295 iotlb = io_mem_watch + paddr;
2296 address |= TLB_MMIO;
2297 break;
2302 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2303 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2304 te = &env->tlb_table[mmu_idx][index];
2305 te->addend = addend - vaddr;
2306 if (prot & PAGE_READ) {
2307 te->addr_read = address;
2308 } else {
2309 te->addr_read = -1;
2312 if (prot & PAGE_EXEC) {
2313 te->addr_code = code_address;
2314 } else {
2315 te->addr_code = -1;
2317 if (prot & PAGE_WRITE) {
2318 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2319 (pd & IO_MEM_ROMD)) {
2320 /* Write access calls the I/O callback. */
2321 te->addr_write = address | TLB_MMIO;
2322 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2323 !cpu_physical_memory_is_dirty(pd)) {
2324 te->addr_write = address | TLB_NOTDIRTY;
2325 } else {
2326 te->addr_write = address;
2328 } else {
2329 te->addr_write = -1;
2333 #else
2335 void tlb_flush(CPUState *env, int flush_global)
2339 void tlb_flush_page(CPUState *env, target_ulong addr)
2344 * Walks guest process memory "regions" one by one
2345 * and calls callback function 'fn' for each region.
2348 struct walk_memory_regions_data
2350 walk_memory_regions_fn fn;
2351 void *priv;
2352 unsigned long start;
2353 int prot;
2356 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2357 abi_ulong end, int new_prot)
2359 if (data->start != -1ul) {
2360 int rc = data->fn(data->priv, data->start, end, data->prot);
2361 if (rc != 0) {
2362 return rc;
2366 data->start = (new_prot ? end : -1ul);
2367 data->prot = new_prot;
2369 return 0;
2372 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2373 abi_ulong base, int level, void **lp)
2375 abi_ulong pa;
2376 int i, rc;
2378 if (*lp == NULL) {
2379 return walk_memory_regions_end(data, base, 0);
2382 if (level == 0) {
2383 PageDesc *pd = *lp;
2384 for (i = 0; i < L2_SIZE; ++i) {
2385 int prot = pd[i].flags;
2387 pa = base | (i << TARGET_PAGE_BITS);
2388 if (prot != data->prot) {
2389 rc = walk_memory_regions_end(data, pa, prot);
2390 if (rc != 0) {
2391 return rc;
2395 } else {
2396 void **pp = *lp;
2397 for (i = 0; i < L2_SIZE; ++i) {
2398 pa = base | ((abi_ulong)i <<
2399 (TARGET_PAGE_BITS + L2_BITS * level));
2400 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2401 if (rc != 0) {
2402 return rc;
2407 return 0;
2410 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2412 struct walk_memory_regions_data data;
2413 unsigned long i;
2415 data.fn = fn;
2416 data.priv = priv;
2417 data.start = -1ul;
2418 data.prot = 0;
2420 for (i = 0; i < V_L1_SIZE; i++) {
2421 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2422 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2423 if (rc != 0) {
2424 return rc;
2428 return walk_memory_regions_end(&data, 0, 0);
2431 static int dump_region(void *priv, abi_ulong start,
2432 abi_ulong end, unsigned long prot)
2434 FILE *f = (FILE *)priv;
2436 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2437 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2438 start, end, end - start,
2439 ((prot & PAGE_READ) ? 'r' : '-'),
2440 ((prot & PAGE_WRITE) ? 'w' : '-'),
2441 ((prot & PAGE_EXEC) ? 'x' : '-'));
2443 return (0);
2446 /* dump memory mappings */
2447 void page_dump(FILE *f)
2449 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2450 "start", "end", "size", "prot");
2451 walk_memory_regions(f, dump_region);
2454 int page_get_flags(target_ulong address)
2456 PageDesc *p;
2458 p = page_find(address >> TARGET_PAGE_BITS);
2459 if (!p)
2460 return 0;
2461 return p->flags;
2464 /* Modify the flags of a page and invalidate the code if necessary.
2465 The flag PAGE_WRITE_ORG is positioned automatically depending
2466 on PAGE_WRITE. The mmap_lock should already be held. */
2467 void page_set_flags(target_ulong start, target_ulong end, int flags)
2469 target_ulong addr, len;
2471 /* This function should never be called with addresses outside the
2472 guest address space. If this assert fires, it probably indicates
2473 a missing call to h2g_valid. */
2474 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2475 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2476 #endif
2477 assert(start < end);
2479 start = start & TARGET_PAGE_MASK;
2480 end = TARGET_PAGE_ALIGN(end);
2482 if (flags & PAGE_WRITE) {
2483 flags |= PAGE_WRITE_ORG;
2486 for (addr = start, len = end - start;
2487 len != 0;
2488 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2489 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2491 /* If the write protection bit is set, then we invalidate
2492 the code inside. */
2493 if (!(p->flags & PAGE_WRITE) &&
2494 (flags & PAGE_WRITE) &&
2495 p->first_tb) {
2496 tb_invalidate_phys_page(addr, 0, NULL);
2498 p->flags = flags;
2502 int page_check_range(target_ulong start, target_ulong len, int flags)
2504 PageDesc *p;
2505 target_ulong end;
2506 target_ulong addr;
2508 /* This function should never be called with addresses outside the
2509 guest address space. If this assert fires, it probably indicates
2510 a missing call to h2g_valid. */
2511 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2512 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2513 #endif
2515 if (len == 0) {
2516 return 0;
2518 if (start + len - 1 < start) {
2519 /* We've wrapped around. */
2520 return -1;
2523 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2524 start = start & TARGET_PAGE_MASK;
2526 for (addr = start, len = end - start;
2527 len != 0;
2528 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2529 p = page_find(addr >> TARGET_PAGE_BITS);
2530 if( !p )
2531 return -1;
2532 if( !(p->flags & PAGE_VALID) )
2533 return -1;
2535 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2536 return -1;
2537 if (flags & PAGE_WRITE) {
2538 if (!(p->flags & PAGE_WRITE_ORG))
2539 return -1;
2540 /* unprotect the page if it was put read-only because it
2541 contains translated code */
2542 if (!(p->flags & PAGE_WRITE)) {
2543 if (!page_unprotect(addr, 0, NULL))
2544 return -1;
2546 return 0;
2549 return 0;
2552 /* called from signal handler: invalidate the code and unprotect the
2553 page. Return TRUE if the fault was successfully handled. */
2554 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2556 unsigned int prot;
2557 PageDesc *p;
2558 target_ulong host_start, host_end, addr;
2560 /* Technically this isn't safe inside a signal handler. However we
2561 know this only ever happens in a synchronous SEGV handler, so in
2562 practice it seems to be ok. */
2563 mmap_lock();
2565 p = page_find(address >> TARGET_PAGE_BITS);
2566 if (!p) {
2567 mmap_unlock();
2568 return 0;
2571 /* if the page was really writable, then we change its
2572 protection back to writable */
2573 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2574 host_start = address & qemu_host_page_mask;
2575 host_end = host_start + qemu_host_page_size;
2577 prot = 0;
2578 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2579 p = page_find(addr >> TARGET_PAGE_BITS);
2580 p->flags |= PAGE_WRITE;
2581 prot |= p->flags;
2583 /* and since the content will be modified, we must invalidate
2584 the corresponding translated code. */
2585 tb_invalidate_phys_page(addr, pc, puc);
2586 #ifdef DEBUG_TB_CHECK
2587 tb_invalidate_check(addr);
2588 #endif
2590 mprotect((void *)g2h(host_start), qemu_host_page_size,
2591 prot & PAGE_BITS);
2593 mmap_unlock();
2594 return 1;
2596 mmap_unlock();
2597 return 0;
2600 static inline void tlb_set_dirty(CPUState *env,
2601 unsigned long addr, target_ulong vaddr)
2604 #endif /* defined(CONFIG_USER_ONLY) */
2606 #if !defined(CONFIG_USER_ONLY)
2608 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2609 typedef struct subpage_t {
2610 target_phys_addr_t base;
2611 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2612 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2613 } subpage_t;
2615 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2616 ram_addr_t memory, ram_addr_t region_offset);
2617 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2618 ram_addr_t orig_memory,
2619 ram_addr_t region_offset);
2620 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2621 need_subpage) \
2622 do { \
2623 if (addr > start_addr) \
2624 start_addr2 = 0; \
2625 else { \
2626 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2627 if (start_addr2 > 0) \
2628 need_subpage = 1; \
2631 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2632 end_addr2 = TARGET_PAGE_SIZE - 1; \
2633 else { \
2634 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2635 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2636 need_subpage = 1; \
2638 } while (0)
2640 /* register physical memory.
2641 For RAM, 'size' must be a multiple of the target page size.
2642 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2643 io memory page. The address used when calling the IO function is
2644 the offset from the start of the region, plus region_offset. Both
2645 start_addr and region_offset are rounded down to a page boundary
2646 before calculating this offset. This should not be a problem unless
2647 the low bits of start_addr and region_offset differ. */
2648 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2649 ram_addr_t size,
2650 ram_addr_t phys_offset,
2651 ram_addr_t region_offset,
2652 bool log_dirty)
2654 target_phys_addr_t addr, end_addr;
2655 PhysPageDesc *p;
2656 CPUState *env;
2657 ram_addr_t orig_size = size;
2658 subpage_t *subpage;
2660 assert(size);
2661 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2663 if (phys_offset == IO_MEM_UNASSIGNED) {
2664 region_offset = start_addr;
2666 region_offset &= TARGET_PAGE_MASK;
2667 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2668 end_addr = start_addr + (target_phys_addr_t)size;
2670 addr = start_addr;
2671 do {
2672 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2673 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2674 ram_addr_t orig_memory = p->phys_offset;
2675 target_phys_addr_t start_addr2, end_addr2;
2676 int need_subpage = 0;
2678 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2679 need_subpage);
2680 if (need_subpage) {
2681 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2682 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2683 &p->phys_offset, orig_memory,
2684 p->region_offset);
2685 } else {
2686 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2687 >> IO_MEM_SHIFT];
2689 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2690 region_offset);
2691 p->region_offset = 0;
2692 } else {
2693 p->phys_offset = phys_offset;
2694 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2695 (phys_offset & IO_MEM_ROMD))
2696 phys_offset += TARGET_PAGE_SIZE;
2698 } else {
2699 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2700 p->phys_offset = phys_offset;
2701 p->region_offset = region_offset;
2702 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2703 (phys_offset & IO_MEM_ROMD)) {
2704 phys_offset += TARGET_PAGE_SIZE;
2705 } else {
2706 target_phys_addr_t start_addr2, end_addr2;
2707 int need_subpage = 0;
2709 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2710 end_addr2, need_subpage);
2712 if (need_subpage) {
2713 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2714 &p->phys_offset, IO_MEM_UNASSIGNED,
2715 addr & TARGET_PAGE_MASK);
2716 subpage_register(subpage, start_addr2, end_addr2,
2717 phys_offset, region_offset);
2718 p->region_offset = 0;
2722 region_offset += TARGET_PAGE_SIZE;
2723 addr += TARGET_PAGE_SIZE;
2724 } while (addr != end_addr);
2726 /* since each CPU stores ram addresses in its TLB cache, we must
2727 reset the modified entries */
2728 /* XXX: slow ! */
2729 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2730 tlb_flush(env, 1);
2734 /* XXX: temporary until new memory mapping API */
2735 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2737 PhysPageDesc *p;
2739 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2740 if (!p)
2741 return IO_MEM_UNASSIGNED;
2742 return p->phys_offset;
2745 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2747 if (kvm_enabled())
2748 kvm_coalesce_mmio_region(addr, size);
2751 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2753 if (kvm_enabled())
2754 kvm_uncoalesce_mmio_region(addr, size);
2757 void qemu_flush_coalesced_mmio_buffer(void)
2759 if (kvm_enabled())
2760 kvm_flush_coalesced_mmio_buffer();
2763 #if defined(__linux__) && !defined(TARGET_S390X)
2765 #include <sys/vfs.h>
2767 #define HUGETLBFS_MAGIC 0x958458f6
2769 static long gethugepagesize(const char *path)
2771 struct statfs fs;
2772 int ret;
2774 do {
2775 ret = statfs(path, &fs);
2776 } while (ret != 0 && errno == EINTR);
2778 if (ret != 0) {
2779 perror(path);
2780 return 0;
2783 if (fs.f_type != HUGETLBFS_MAGIC)
2784 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2786 return fs.f_bsize;
2789 static void *file_ram_alloc(RAMBlock *block,
2790 ram_addr_t memory,
2791 const char *path)
2793 char *filename;
2794 void *area;
2795 int fd;
2796 #ifdef MAP_POPULATE
2797 int flags;
2798 #endif
2799 unsigned long hpagesize;
2801 hpagesize = gethugepagesize(path);
2802 if (!hpagesize) {
2803 return NULL;
2806 if (memory < hpagesize) {
2807 return NULL;
2810 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2811 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2812 return NULL;
2815 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2816 return NULL;
2819 fd = mkstemp(filename);
2820 if (fd < 0) {
2821 perror("unable to create backing store for hugepages");
2822 free(filename);
2823 return NULL;
2825 unlink(filename);
2826 free(filename);
2828 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2831 * ftruncate is not supported by hugetlbfs in older
2832 * hosts, so don't bother bailing out on errors.
2833 * If anything goes wrong with it under other filesystems,
2834 * mmap will fail.
2836 if (ftruncate(fd, memory))
2837 perror("ftruncate");
2839 #ifdef MAP_POPULATE
2840 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2841 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2842 * to sidestep this quirk.
2844 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2845 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2846 #else
2847 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2848 #endif
2849 if (area == MAP_FAILED) {
2850 perror("file_ram_alloc: can't mmap RAM pages");
2851 close(fd);
2852 return (NULL);
2854 block->fd = fd;
2855 return area;
2857 #endif
2859 static ram_addr_t find_ram_offset(ram_addr_t size)
2861 RAMBlock *block, *next_block;
2862 ram_addr_t offset = 0, mingap = ULONG_MAX;
2864 if (QLIST_EMPTY(&ram_list.blocks))
2865 return 0;
2867 QLIST_FOREACH(block, &ram_list.blocks, next) {
2868 ram_addr_t end, next = ULONG_MAX;
2870 end = block->offset + block->length;
2872 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2873 if (next_block->offset >= end) {
2874 next = MIN(next, next_block->offset);
2877 if (next - end >= size && next - end < mingap) {
2878 offset = end;
2879 mingap = next - end;
2882 return offset;
2885 static ram_addr_t last_ram_offset(void)
2887 RAMBlock *block;
2888 ram_addr_t last = 0;
2890 QLIST_FOREACH(block, &ram_list.blocks, next)
2891 last = MAX(last, block->offset + block->length);
2893 return last;
2896 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2897 ram_addr_t size, void *host)
2899 RAMBlock *new_block, *block;
2901 size = TARGET_PAGE_ALIGN(size);
2902 new_block = qemu_mallocz(sizeof(*new_block));
2904 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2905 char *id = dev->parent_bus->info->get_dev_path(dev);
2906 if (id) {
2907 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2908 qemu_free(id);
2911 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2913 QLIST_FOREACH(block, &ram_list.blocks, next) {
2914 if (!strcmp(block->idstr, new_block->idstr)) {
2915 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2916 new_block->idstr);
2917 abort();
2921 new_block->offset = find_ram_offset(size);
2922 if (host) {
2923 new_block->host = host;
2924 new_block->flags |= RAM_PREALLOC_MASK;
2925 } else {
2926 if (mem_path) {
2927 #if defined (__linux__) && !defined(TARGET_S390X)
2928 new_block->host = file_ram_alloc(new_block, size, mem_path);
2929 if (!new_block->host) {
2930 new_block->host = qemu_vmalloc(size);
2931 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2933 #else
2934 fprintf(stderr, "-mem-path option unsupported\n");
2935 exit(1);
2936 #endif
2937 } else {
2938 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2939 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2940 an system defined value, which is at least 256GB. Larger systems
2941 have larger values. We put the guest between the end of data
2942 segment (system break) and this value. We use 32GB as a base to
2943 have enough room for the system break to grow. */
2944 new_block->host = mmap((void*)0x800000000, size,
2945 PROT_EXEC|PROT_READ|PROT_WRITE,
2946 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2947 if (new_block->host == MAP_FAILED) {
2948 fprintf(stderr, "Allocating RAM failed\n");
2949 abort();
2951 #else
2952 if (xen_mapcache_enabled()) {
2953 xen_ram_alloc(new_block->offset, size);
2954 } else {
2955 new_block->host = qemu_vmalloc(size);
2957 #endif
2958 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2961 new_block->length = size;
2963 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2965 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2966 last_ram_offset() >> TARGET_PAGE_BITS);
2967 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2968 0xff, size >> TARGET_PAGE_BITS);
2970 if (kvm_enabled())
2971 kvm_setup_guest_memory(new_block->host, size);
2973 return new_block->offset;
2976 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2978 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2981 void qemu_ram_free_from_ptr(ram_addr_t addr)
2983 RAMBlock *block;
2985 QLIST_FOREACH(block, &ram_list.blocks, next) {
2986 if (addr == block->offset) {
2987 QLIST_REMOVE(block, next);
2988 qemu_free(block);
2989 return;
2994 void qemu_ram_free(ram_addr_t addr)
2996 RAMBlock *block;
2998 QLIST_FOREACH(block, &ram_list.blocks, next) {
2999 if (addr == block->offset) {
3000 QLIST_REMOVE(block, next);
3001 if (block->flags & RAM_PREALLOC_MASK) {
3003 } else if (mem_path) {
3004 #if defined (__linux__) && !defined(TARGET_S390X)
3005 if (block->fd) {
3006 munmap(block->host, block->length);
3007 close(block->fd);
3008 } else {
3009 qemu_vfree(block->host);
3011 #else
3012 abort();
3013 #endif
3014 } else {
3015 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3016 munmap(block->host, block->length);
3017 #else
3018 if (xen_mapcache_enabled()) {
3019 qemu_invalidate_entry(block->host);
3020 } else {
3021 qemu_vfree(block->host);
3023 #endif
3025 qemu_free(block);
3026 return;
3032 #ifndef _WIN32
3033 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3035 RAMBlock *block;
3036 ram_addr_t offset;
3037 int flags;
3038 void *area, *vaddr;
3040 QLIST_FOREACH(block, &ram_list.blocks, next) {
3041 offset = addr - block->offset;
3042 if (offset < block->length) {
3043 vaddr = block->host + offset;
3044 if (block->flags & RAM_PREALLOC_MASK) {
3046 } else {
3047 flags = MAP_FIXED;
3048 munmap(vaddr, length);
3049 if (mem_path) {
3050 #if defined(__linux__) && !defined(TARGET_S390X)
3051 if (block->fd) {
3052 #ifdef MAP_POPULATE
3053 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3054 MAP_PRIVATE;
3055 #else
3056 flags |= MAP_PRIVATE;
3057 #endif
3058 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3059 flags, block->fd, offset);
3060 } else {
3061 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3062 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3063 flags, -1, 0);
3065 #else
3066 abort();
3067 #endif
3068 } else {
3069 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3070 flags |= MAP_SHARED | MAP_ANONYMOUS;
3071 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3072 flags, -1, 0);
3073 #else
3074 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3075 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3076 flags, -1, 0);
3077 #endif
3079 if (area != vaddr) {
3080 fprintf(stderr, "Could not remap addr: %lx@%lx\n",
3081 length, addr);
3082 exit(1);
3084 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3086 return;
3090 #endif /* !_WIN32 */
3092 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3093 With the exception of the softmmu code in this file, this should
3094 only be used for local memory (e.g. video ram) that the device owns,
3095 and knows it isn't going to access beyond the end of the block.
3097 It should not be used for general purpose DMA.
3098 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3100 void *qemu_get_ram_ptr(ram_addr_t addr)
3102 RAMBlock *block;
3104 QLIST_FOREACH(block, &ram_list.blocks, next) {
3105 if (addr - block->offset < block->length) {
3106 /* Move this entry to to start of the list. */
3107 if (block != QLIST_FIRST(&ram_list.blocks)) {
3108 QLIST_REMOVE(block, next);
3109 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3111 if (xen_mapcache_enabled()) {
3112 /* We need to check if the requested address is in the RAM
3113 * because we don't want to map the entire memory in QEMU.
3115 if (block->offset == 0) {
3116 return qemu_map_cache(addr, 0, 1);
3117 } else if (block->host == NULL) {
3118 block->host = xen_map_block(block->offset, block->length);
3121 return block->host + (addr - block->offset);
3125 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3126 abort();
3128 return NULL;
3131 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3132 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3134 void *qemu_safe_ram_ptr(ram_addr_t addr)
3136 RAMBlock *block;
3138 QLIST_FOREACH(block, &ram_list.blocks, next) {
3139 if (addr - block->offset < block->length) {
3140 if (xen_mapcache_enabled()) {
3141 /* We need to check if the requested address is in the RAM
3142 * because we don't want to map the entire memory in QEMU.
3144 if (block->offset == 0) {
3145 return qemu_map_cache(addr, 0, 1);
3146 } else if (block->host == NULL) {
3147 block->host = xen_map_block(block->offset, block->length);
3150 return block->host + (addr - block->offset);
3154 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3155 abort();
3157 return NULL;
3160 void qemu_put_ram_ptr(void *addr)
3162 trace_qemu_put_ram_ptr(addr);
3164 if (xen_mapcache_enabled()) {
3165 RAMBlock *block;
3167 QLIST_FOREACH(block, &ram_list.blocks, next) {
3168 if (addr == block->host) {
3169 break;
3172 if (block && block->host) {
3173 xen_unmap_block(block->host, block->length);
3174 block->host = NULL;
3175 } else {
3176 qemu_map_cache_unlock(addr);
3181 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3183 RAMBlock *block;
3184 uint8_t *host = ptr;
3186 QLIST_FOREACH(block, &ram_list.blocks, next) {
3187 /* This case append when the block is not mapped. */
3188 if (block->host == NULL) {
3189 continue;
3191 if (host - block->host < block->length) {
3192 *ram_addr = block->offset + (host - block->host);
3193 return 0;
3197 if (xen_mapcache_enabled()) {
3198 *ram_addr = qemu_ram_addr_from_mapcache(ptr);
3199 return 0;
3202 return -1;
3205 /* Some of the softmmu routines need to translate from a host pointer
3206 (typically a TLB entry) back to a ram offset. */
3207 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3209 ram_addr_t ram_addr;
3211 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3212 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3213 abort();
3215 return ram_addr;
3218 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3220 #ifdef DEBUG_UNASSIGNED
3221 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3222 #endif
3223 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3224 do_unassigned_access(addr, 0, 0, 0, 1);
3225 #endif
3226 return 0;
3229 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3231 #ifdef DEBUG_UNASSIGNED
3232 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3233 #endif
3234 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3235 do_unassigned_access(addr, 0, 0, 0, 2);
3236 #endif
3237 return 0;
3240 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3242 #ifdef DEBUG_UNASSIGNED
3243 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3244 #endif
3245 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3246 do_unassigned_access(addr, 0, 0, 0, 4);
3247 #endif
3248 return 0;
3251 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3253 #ifdef DEBUG_UNASSIGNED
3254 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3255 #endif
3256 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3257 do_unassigned_access(addr, 1, 0, 0, 1);
3258 #endif
3261 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3263 #ifdef DEBUG_UNASSIGNED
3264 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3265 #endif
3266 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3267 do_unassigned_access(addr, 1, 0, 0, 2);
3268 #endif
3271 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3273 #ifdef DEBUG_UNASSIGNED
3274 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3275 #endif
3276 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3277 do_unassigned_access(addr, 1, 0, 0, 4);
3278 #endif
3281 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3282 unassigned_mem_readb,
3283 unassigned_mem_readw,
3284 unassigned_mem_readl,
3287 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3288 unassigned_mem_writeb,
3289 unassigned_mem_writew,
3290 unassigned_mem_writel,
3293 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3294 uint32_t val)
3296 int dirty_flags;
3297 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3298 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3299 #if !defined(CONFIG_USER_ONLY)
3300 tb_invalidate_phys_page_fast(ram_addr, 1);
3301 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3302 #endif
3304 stb_p(qemu_get_ram_ptr(ram_addr), val);
3305 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3306 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3307 /* we remove the notdirty callback only if the code has been
3308 flushed */
3309 if (dirty_flags == 0xff)
3310 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3313 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3314 uint32_t val)
3316 int dirty_flags;
3317 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3318 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3319 #if !defined(CONFIG_USER_ONLY)
3320 tb_invalidate_phys_page_fast(ram_addr, 2);
3321 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3322 #endif
3324 stw_p(qemu_get_ram_ptr(ram_addr), val);
3325 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3326 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3327 /* we remove the notdirty callback only if the code has been
3328 flushed */
3329 if (dirty_flags == 0xff)
3330 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3333 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3334 uint32_t val)
3336 int dirty_flags;
3337 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3338 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3339 #if !defined(CONFIG_USER_ONLY)
3340 tb_invalidate_phys_page_fast(ram_addr, 4);
3341 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3342 #endif
3344 stl_p(qemu_get_ram_ptr(ram_addr), val);
3345 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3346 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3347 /* we remove the notdirty callback only if the code has been
3348 flushed */
3349 if (dirty_flags == 0xff)
3350 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3353 static CPUReadMemoryFunc * const error_mem_read[3] = {
3354 NULL, /* never used */
3355 NULL, /* never used */
3356 NULL, /* never used */
3359 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3360 notdirty_mem_writeb,
3361 notdirty_mem_writew,
3362 notdirty_mem_writel,
3365 /* Generate a debug exception if a watchpoint has been hit. */
3366 static void check_watchpoint(int offset, int len_mask, int flags)
3368 CPUState *env = cpu_single_env;
3369 target_ulong pc, cs_base;
3370 TranslationBlock *tb;
3371 target_ulong vaddr;
3372 CPUWatchpoint *wp;
3373 int cpu_flags;
3375 if (env->watchpoint_hit) {
3376 /* We re-entered the check after replacing the TB. Now raise
3377 * the debug interrupt so that is will trigger after the
3378 * current instruction. */
3379 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3380 return;
3382 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3383 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3384 if ((vaddr == (wp->vaddr & len_mask) ||
3385 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3386 wp->flags |= BP_WATCHPOINT_HIT;
3387 if (!env->watchpoint_hit) {
3388 env->watchpoint_hit = wp;
3389 tb = tb_find_pc(env->mem_io_pc);
3390 if (!tb) {
3391 cpu_abort(env, "check_watchpoint: could not find TB for "
3392 "pc=%p", (void *)env->mem_io_pc);
3394 cpu_restore_state(tb, env, env->mem_io_pc);
3395 tb_phys_invalidate(tb, -1);
3396 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3397 env->exception_index = EXCP_DEBUG;
3398 } else {
3399 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3400 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3402 cpu_resume_from_signal(env, NULL);
3404 } else {
3405 wp->flags &= ~BP_WATCHPOINT_HIT;
3410 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3411 so these check for a hit then pass through to the normal out-of-line
3412 phys routines. */
3413 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3415 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3416 return ldub_phys(addr);
3419 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3421 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3422 return lduw_phys(addr);
3425 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3427 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3428 return ldl_phys(addr);
3431 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3432 uint32_t val)
3434 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3435 stb_phys(addr, val);
3438 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3439 uint32_t val)
3441 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3442 stw_phys(addr, val);
3445 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3446 uint32_t val)
3448 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3449 stl_phys(addr, val);
3452 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3453 watch_mem_readb,
3454 watch_mem_readw,
3455 watch_mem_readl,
3458 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3459 watch_mem_writeb,
3460 watch_mem_writew,
3461 watch_mem_writel,
3464 static inline uint32_t subpage_readlen (subpage_t *mmio,
3465 target_phys_addr_t addr,
3466 unsigned int len)
3468 unsigned int idx = SUBPAGE_IDX(addr);
3469 #if defined(DEBUG_SUBPAGE)
3470 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3471 mmio, len, addr, idx);
3472 #endif
3474 addr += mmio->region_offset[idx];
3475 idx = mmio->sub_io_index[idx];
3476 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3479 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3480 uint32_t value, unsigned int len)
3482 unsigned int idx = SUBPAGE_IDX(addr);
3483 #if defined(DEBUG_SUBPAGE)
3484 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3485 __func__, mmio, len, addr, idx, value);
3486 #endif
3488 addr += mmio->region_offset[idx];
3489 idx = mmio->sub_io_index[idx];
3490 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3493 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3495 return subpage_readlen(opaque, addr, 0);
3498 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3499 uint32_t value)
3501 subpage_writelen(opaque, addr, value, 0);
3504 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3506 return subpage_readlen(opaque, addr, 1);
3509 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3510 uint32_t value)
3512 subpage_writelen(opaque, addr, value, 1);
3515 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3517 return subpage_readlen(opaque, addr, 2);
3520 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3521 uint32_t value)
3523 subpage_writelen(opaque, addr, value, 2);
3526 static CPUReadMemoryFunc * const subpage_read[] = {
3527 &subpage_readb,
3528 &subpage_readw,
3529 &subpage_readl,
3532 static CPUWriteMemoryFunc * const subpage_write[] = {
3533 &subpage_writeb,
3534 &subpage_writew,
3535 &subpage_writel,
3538 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3539 ram_addr_t memory, ram_addr_t region_offset)
3541 int idx, eidx;
3543 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3544 return -1;
3545 idx = SUBPAGE_IDX(start);
3546 eidx = SUBPAGE_IDX(end);
3547 #if defined(DEBUG_SUBPAGE)
3548 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3549 mmio, start, end, idx, eidx, memory);
3550 #endif
3551 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3552 memory = IO_MEM_UNASSIGNED;
3553 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3554 for (; idx <= eidx; idx++) {
3555 mmio->sub_io_index[idx] = memory;
3556 mmio->region_offset[idx] = region_offset;
3559 return 0;
3562 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3563 ram_addr_t orig_memory,
3564 ram_addr_t region_offset)
3566 subpage_t *mmio;
3567 int subpage_memory;
3569 mmio = qemu_mallocz(sizeof(subpage_t));
3571 mmio->base = base;
3572 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3573 DEVICE_NATIVE_ENDIAN);
3574 #if defined(DEBUG_SUBPAGE)
3575 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3576 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3577 #endif
3578 *phys = subpage_memory | IO_MEM_SUBPAGE;
3579 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3581 return mmio;
3584 static int get_free_io_mem_idx(void)
3586 int i;
3588 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3589 if (!io_mem_used[i]) {
3590 io_mem_used[i] = 1;
3591 return i;
3593 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3594 return -1;
3598 * Usually, devices operate in little endian mode. There are devices out
3599 * there that operate in big endian too. Each device gets byte swapped
3600 * mmio if plugged onto a CPU that does the other endianness.
3602 * CPU Device swap?
3604 * little little no
3605 * little big yes
3606 * big little yes
3607 * big big no
3610 typedef struct SwapEndianContainer {
3611 CPUReadMemoryFunc *read[3];
3612 CPUWriteMemoryFunc *write[3];
3613 void *opaque;
3614 } SwapEndianContainer;
3616 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3618 uint32_t val;
3619 SwapEndianContainer *c = opaque;
3620 val = c->read[0](c->opaque, addr);
3621 return val;
3624 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3626 uint32_t val;
3627 SwapEndianContainer *c = opaque;
3628 val = bswap16(c->read[1](c->opaque, addr));
3629 return val;
3632 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3634 uint32_t val;
3635 SwapEndianContainer *c = opaque;
3636 val = bswap32(c->read[2](c->opaque, addr));
3637 return val;
3640 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3641 swapendian_mem_readb,
3642 swapendian_mem_readw,
3643 swapendian_mem_readl
3646 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3647 uint32_t val)
3649 SwapEndianContainer *c = opaque;
3650 c->write[0](c->opaque, addr, val);
3653 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3654 uint32_t val)
3656 SwapEndianContainer *c = opaque;
3657 c->write[1](c->opaque, addr, bswap16(val));
3660 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3661 uint32_t val)
3663 SwapEndianContainer *c = opaque;
3664 c->write[2](c->opaque, addr, bswap32(val));
3667 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3668 swapendian_mem_writeb,
3669 swapendian_mem_writew,
3670 swapendian_mem_writel
3673 static void swapendian_init(int io_index)
3675 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3676 int i;
3678 /* Swap mmio for big endian targets */
3679 c->opaque = io_mem_opaque[io_index];
3680 for (i = 0; i < 3; i++) {
3681 c->read[i] = io_mem_read[io_index][i];
3682 c->write[i] = io_mem_write[io_index][i];
3684 io_mem_read[io_index][i] = swapendian_readfn[i];
3685 io_mem_write[io_index][i] = swapendian_writefn[i];
3687 io_mem_opaque[io_index] = c;
3690 static void swapendian_del(int io_index)
3692 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3693 qemu_free(io_mem_opaque[io_index]);
3697 /* mem_read and mem_write are arrays of functions containing the
3698 function to access byte (index 0), word (index 1) and dword (index
3699 2). Functions can be omitted with a NULL function pointer.
3700 If io_index is non zero, the corresponding io zone is
3701 modified. If it is zero, a new io zone is allocated. The return
3702 value can be used with cpu_register_physical_memory(). (-1) is
3703 returned if error. */
3704 static int cpu_register_io_memory_fixed(int io_index,
3705 CPUReadMemoryFunc * const *mem_read,
3706 CPUWriteMemoryFunc * const *mem_write,
3707 void *opaque, enum device_endian endian)
3709 int i;
3711 if (io_index <= 0) {
3712 io_index = get_free_io_mem_idx();
3713 if (io_index == -1)
3714 return io_index;
3715 } else {
3716 io_index >>= IO_MEM_SHIFT;
3717 if (io_index >= IO_MEM_NB_ENTRIES)
3718 return -1;
3721 for (i = 0; i < 3; ++i) {
3722 io_mem_read[io_index][i]
3723 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3725 for (i = 0; i < 3; ++i) {
3726 io_mem_write[io_index][i]
3727 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3729 io_mem_opaque[io_index] = opaque;
3731 switch (endian) {
3732 case DEVICE_BIG_ENDIAN:
3733 #ifndef TARGET_WORDS_BIGENDIAN
3734 swapendian_init(io_index);
3735 #endif
3736 break;
3737 case DEVICE_LITTLE_ENDIAN:
3738 #ifdef TARGET_WORDS_BIGENDIAN
3739 swapendian_init(io_index);
3740 #endif
3741 break;
3742 case DEVICE_NATIVE_ENDIAN:
3743 default:
3744 break;
3747 return (io_index << IO_MEM_SHIFT);
3750 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3751 CPUWriteMemoryFunc * const *mem_write,
3752 void *opaque, enum device_endian endian)
3754 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3757 void cpu_unregister_io_memory(int io_table_address)
3759 int i;
3760 int io_index = io_table_address >> IO_MEM_SHIFT;
3762 swapendian_del(io_index);
3764 for (i=0;i < 3; i++) {
3765 io_mem_read[io_index][i] = unassigned_mem_read[i];
3766 io_mem_write[io_index][i] = unassigned_mem_write[i];
3768 io_mem_opaque[io_index] = NULL;
3769 io_mem_used[io_index] = 0;
3772 static void io_mem_init(void)
3774 int i;
3776 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3777 unassigned_mem_write, NULL,
3778 DEVICE_NATIVE_ENDIAN);
3779 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3780 unassigned_mem_write, NULL,
3781 DEVICE_NATIVE_ENDIAN);
3782 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3783 notdirty_mem_write, NULL,
3784 DEVICE_NATIVE_ENDIAN);
3785 for (i=0; i<5; i++)
3786 io_mem_used[i] = 1;
3788 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3789 watch_mem_write, NULL,
3790 DEVICE_NATIVE_ENDIAN);
3793 #endif /* !defined(CONFIG_USER_ONLY) */
3795 /* physical memory access (slow version, mainly for debug) */
3796 #if defined(CONFIG_USER_ONLY)
3797 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3798 uint8_t *buf, int len, int is_write)
3800 int l, flags;
3801 target_ulong page;
3802 void * p;
3804 while (len > 0) {
3805 page = addr & TARGET_PAGE_MASK;
3806 l = (page + TARGET_PAGE_SIZE) - addr;
3807 if (l > len)
3808 l = len;
3809 flags = page_get_flags(page);
3810 if (!(flags & PAGE_VALID))
3811 return -1;
3812 if (is_write) {
3813 if (!(flags & PAGE_WRITE))
3814 return -1;
3815 /* XXX: this code should not depend on lock_user */
3816 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3817 return -1;
3818 memcpy(p, buf, l);
3819 unlock_user(p, addr, l);
3820 } else {
3821 if (!(flags & PAGE_READ))
3822 return -1;
3823 /* XXX: this code should not depend on lock_user */
3824 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3825 return -1;
3826 memcpy(buf, p, l);
3827 unlock_user(p, addr, 0);
3829 len -= l;
3830 buf += l;
3831 addr += l;
3833 return 0;
3836 #else
3837 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3838 int len, int is_write)
3840 int l, io_index;
3841 uint8_t *ptr;
3842 uint32_t val;
3843 target_phys_addr_t page;
3844 unsigned long pd;
3845 PhysPageDesc *p;
3847 while (len > 0) {
3848 page = addr & TARGET_PAGE_MASK;
3849 l = (page + TARGET_PAGE_SIZE) - addr;
3850 if (l > len)
3851 l = len;
3852 p = phys_page_find(page >> TARGET_PAGE_BITS);
3853 if (!p) {
3854 pd = IO_MEM_UNASSIGNED;
3855 } else {
3856 pd = p->phys_offset;
3859 if (is_write) {
3860 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3861 target_phys_addr_t addr1 = addr;
3862 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3863 if (p)
3864 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3865 /* XXX: could force cpu_single_env to NULL to avoid
3866 potential bugs */
3867 if (l >= 4 && ((addr1 & 3) == 0)) {
3868 /* 32 bit write access */
3869 val = ldl_p(buf);
3870 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3871 l = 4;
3872 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3873 /* 16 bit write access */
3874 val = lduw_p(buf);
3875 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3876 l = 2;
3877 } else {
3878 /* 8 bit write access */
3879 val = ldub_p(buf);
3880 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3881 l = 1;
3883 } else {
3884 unsigned long addr1;
3885 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3886 /* RAM case */
3887 ptr = qemu_get_ram_ptr(addr1);
3888 memcpy(ptr, buf, l);
3889 if (!cpu_physical_memory_is_dirty(addr1)) {
3890 /* invalidate code */
3891 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3892 /* set dirty bit */
3893 cpu_physical_memory_set_dirty_flags(
3894 addr1, (0xff & ~CODE_DIRTY_FLAG));
3896 qemu_put_ram_ptr(ptr);
3898 } else {
3899 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3900 !(pd & IO_MEM_ROMD)) {
3901 target_phys_addr_t addr1 = addr;
3902 /* I/O case */
3903 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3904 if (p)
3905 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3906 if (l >= 4 && ((addr1 & 3) == 0)) {
3907 /* 32 bit read access */
3908 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3909 stl_p(buf, val);
3910 l = 4;
3911 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3912 /* 16 bit read access */
3913 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3914 stw_p(buf, val);
3915 l = 2;
3916 } else {
3917 /* 8 bit read access */
3918 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3919 stb_p(buf, val);
3920 l = 1;
3922 } else {
3923 /* RAM case */
3924 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3925 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3926 qemu_put_ram_ptr(ptr);
3929 len -= l;
3930 buf += l;
3931 addr += l;
3935 /* used for ROM loading : can write in RAM and ROM */
3936 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3937 const uint8_t *buf, int len)
3939 int l;
3940 uint8_t *ptr;
3941 target_phys_addr_t page;
3942 unsigned long pd;
3943 PhysPageDesc *p;
3945 while (len > 0) {
3946 page = addr & TARGET_PAGE_MASK;
3947 l = (page + TARGET_PAGE_SIZE) - addr;
3948 if (l > len)
3949 l = len;
3950 p = phys_page_find(page >> TARGET_PAGE_BITS);
3951 if (!p) {
3952 pd = IO_MEM_UNASSIGNED;
3953 } else {
3954 pd = p->phys_offset;
3957 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3958 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3959 !(pd & IO_MEM_ROMD)) {
3960 /* do nothing */
3961 } else {
3962 unsigned long addr1;
3963 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3964 /* ROM/RAM case */
3965 ptr = qemu_get_ram_ptr(addr1);
3966 memcpy(ptr, buf, l);
3967 qemu_put_ram_ptr(ptr);
3969 len -= l;
3970 buf += l;
3971 addr += l;
3975 typedef struct {
3976 void *buffer;
3977 target_phys_addr_t addr;
3978 target_phys_addr_t len;
3979 } BounceBuffer;
3981 static BounceBuffer bounce;
3983 typedef struct MapClient {
3984 void *opaque;
3985 void (*callback)(void *opaque);
3986 QLIST_ENTRY(MapClient) link;
3987 } MapClient;
3989 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3990 = QLIST_HEAD_INITIALIZER(map_client_list);
3992 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3994 MapClient *client = qemu_malloc(sizeof(*client));
3996 client->opaque = opaque;
3997 client->callback = callback;
3998 QLIST_INSERT_HEAD(&map_client_list, client, link);
3999 return client;
4002 void cpu_unregister_map_client(void *_client)
4004 MapClient *client = (MapClient *)_client;
4006 QLIST_REMOVE(client, link);
4007 qemu_free(client);
4010 static void cpu_notify_map_clients(void)
4012 MapClient *client;
4014 while (!QLIST_EMPTY(&map_client_list)) {
4015 client = QLIST_FIRST(&map_client_list);
4016 client->callback(client->opaque);
4017 cpu_unregister_map_client(client);
4021 /* Map a physical memory region into a host virtual address.
4022 * May map a subset of the requested range, given by and returned in *plen.
4023 * May return NULL if resources needed to perform the mapping are exhausted.
4024 * Use only for reads OR writes - not for read-modify-write operations.
4025 * Use cpu_register_map_client() to know when retrying the map operation is
4026 * likely to succeed.
4028 void *cpu_physical_memory_map(target_phys_addr_t addr,
4029 target_phys_addr_t *plen,
4030 int is_write)
4032 target_phys_addr_t len = *plen;
4033 target_phys_addr_t done = 0;
4034 int l;
4035 uint8_t *ret = NULL;
4036 uint8_t *ptr;
4037 target_phys_addr_t page;
4038 unsigned long pd;
4039 PhysPageDesc *p;
4040 unsigned long addr1;
4042 while (len > 0) {
4043 page = addr & TARGET_PAGE_MASK;
4044 l = (page + TARGET_PAGE_SIZE) - addr;
4045 if (l > len)
4046 l = len;
4047 p = phys_page_find(page >> TARGET_PAGE_BITS);
4048 if (!p) {
4049 pd = IO_MEM_UNASSIGNED;
4050 } else {
4051 pd = p->phys_offset;
4054 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4055 if (done || bounce.buffer) {
4056 break;
4058 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4059 bounce.addr = addr;
4060 bounce.len = l;
4061 if (!is_write) {
4062 cpu_physical_memory_read(addr, bounce.buffer, l);
4064 ptr = bounce.buffer;
4065 } else {
4066 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4067 ptr = qemu_get_ram_ptr(addr1);
4069 if (!done) {
4070 ret = ptr;
4071 } else if (ret + done != ptr) {
4072 break;
4075 len -= l;
4076 addr += l;
4077 done += l;
4079 *plen = done;
4080 return ret;
4083 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4084 * Will also mark the memory as dirty if is_write == 1. access_len gives
4085 * the amount of memory that was actually read or written by the caller.
4087 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4088 int is_write, target_phys_addr_t access_len)
4090 if (buffer != bounce.buffer) {
4091 if (is_write) {
4092 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4093 while (access_len) {
4094 unsigned l;
4095 l = TARGET_PAGE_SIZE;
4096 if (l > access_len)
4097 l = access_len;
4098 if (!cpu_physical_memory_is_dirty(addr1)) {
4099 /* invalidate code */
4100 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4101 /* set dirty bit */
4102 cpu_physical_memory_set_dirty_flags(
4103 addr1, (0xff & ~CODE_DIRTY_FLAG));
4105 addr1 += l;
4106 access_len -= l;
4109 if (xen_mapcache_enabled()) {
4110 uint8_t *buffer1 = buffer;
4111 uint8_t *end_buffer = buffer + len;
4113 while (buffer1 < end_buffer) {
4114 qemu_put_ram_ptr(buffer1);
4115 buffer1 += TARGET_PAGE_SIZE;
4118 return;
4120 if (is_write) {
4121 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4123 qemu_vfree(bounce.buffer);
4124 bounce.buffer = NULL;
4125 cpu_notify_map_clients();
4128 /* warning: addr must be aligned */
4129 uint32_t ldl_phys(target_phys_addr_t addr)
4131 int io_index;
4132 uint8_t *ptr;
4133 uint32_t val;
4134 unsigned long pd;
4135 PhysPageDesc *p;
4137 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4138 if (!p) {
4139 pd = IO_MEM_UNASSIGNED;
4140 } else {
4141 pd = p->phys_offset;
4144 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4145 !(pd & IO_MEM_ROMD)) {
4146 /* I/O case */
4147 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4148 if (p)
4149 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4150 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4151 } else {
4152 /* RAM case */
4153 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4154 (addr & ~TARGET_PAGE_MASK);
4155 val = ldl_p(ptr);
4157 return val;
4160 /* warning: addr must be aligned */
4161 uint64_t ldq_phys(target_phys_addr_t addr)
4163 int io_index;
4164 uint8_t *ptr;
4165 uint64_t val;
4166 unsigned long pd;
4167 PhysPageDesc *p;
4169 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4170 if (!p) {
4171 pd = IO_MEM_UNASSIGNED;
4172 } else {
4173 pd = p->phys_offset;
4176 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4177 !(pd & IO_MEM_ROMD)) {
4178 /* I/O case */
4179 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4180 if (p)
4181 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4182 #ifdef TARGET_WORDS_BIGENDIAN
4183 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4184 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4185 #else
4186 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4187 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4188 #endif
4189 } else {
4190 /* RAM case */
4191 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4192 (addr & ~TARGET_PAGE_MASK);
4193 val = ldq_p(ptr);
4195 return val;
4198 /* XXX: optimize */
4199 uint32_t ldub_phys(target_phys_addr_t addr)
4201 uint8_t val;
4202 cpu_physical_memory_read(addr, &val, 1);
4203 return val;
4206 /* warning: addr must be aligned */
4207 uint32_t lduw_phys(target_phys_addr_t addr)
4209 int io_index;
4210 uint8_t *ptr;
4211 uint64_t val;
4212 unsigned long pd;
4213 PhysPageDesc *p;
4215 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4216 if (!p) {
4217 pd = IO_MEM_UNASSIGNED;
4218 } else {
4219 pd = p->phys_offset;
4222 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4223 !(pd & IO_MEM_ROMD)) {
4224 /* I/O case */
4225 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4226 if (p)
4227 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4228 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4229 } else {
4230 /* RAM case */
4231 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4232 (addr & ~TARGET_PAGE_MASK);
4233 val = lduw_p(ptr);
4235 return val;
4238 /* warning: addr must be aligned. The ram page is not masked as dirty
4239 and the code inside is not invalidated. It is useful if the dirty
4240 bits are used to track modified PTEs */
4241 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4243 int io_index;
4244 uint8_t *ptr;
4245 unsigned long pd;
4246 PhysPageDesc *p;
4248 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4249 if (!p) {
4250 pd = IO_MEM_UNASSIGNED;
4251 } else {
4252 pd = p->phys_offset;
4255 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4256 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4257 if (p)
4258 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4259 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4260 } else {
4261 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4262 ptr = qemu_get_ram_ptr(addr1);
4263 stl_p(ptr, val);
4265 if (unlikely(in_migration)) {
4266 if (!cpu_physical_memory_is_dirty(addr1)) {
4267 /* invalidate code */
4268 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4269 /* set dirty bit */
4270 cpu_physical_memory_set_dirty_flags(
4271 addr1, (0xff & ~CODE_DIRTY_FLAG));
4277 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4279 int io_index;
4280 uint8_t *ptr;
4281 unsigned long pd;
4282 PhysPageDesc *p;
4284 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4285 if (!p) {
4286 pd = IO_MEM_UNASSIGNED;
4287 } else {
4288 pd = p->phys_offset;
4291 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4292 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4293 if (p)
4294 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4295 #ifdef TARGET_WORDS_BIGENDIAN
4296 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4297 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4298 #else
4299 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4300 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4301 #endif
4302 } else {
4303 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4304 (addr & ~TARGET_PAGE_MASK);
4305 stq_p(ptr, val);
4309 /* warning: addr must be aligned */
4310 void stl_phys(target_phys_addr_t addr, uint32_t val)
4312 int io_index;
4313 uint8_t *ptr;
4314 unsigned long pd;
4315 PhysPageDesc *p;
4317 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4318 if (!p) {
4319 pd = IO_MEM_UNASSIGNED;
4320 } else {
4321 pd = p->phys_offset;
4324 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4325 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4326 if (p)
4327 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4328 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4329 } else {
4330 unsigned long addr1;
4331 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4332 /* RAM case */
4333 ptr = qemu_get_ram_ptr(addr1);
4334 stl_p(ptr, val);
4335 if (!cpu_physical_memory_is_dirty(addr1)) {
4336 /* invalidate code */
4337 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4338 /* set dirty bit */
4339 cpu_physical_memory_set_dirty_flags(addr1,
4340 (0xff & ~CODE_DIRTY_FLAG));
4345 /* XXX: optimize */
4346 void stb_phys(target_phys_addr_t addr, uint32_t val)
4348 uint8_t v = val;
4349 cpu_physical_memory_write(addr, &v, 1);
4352 /* warning: addr must be aligned */
4353 void stw_phys(target_phys_addr_t addr, uint32_t val)
4355 int io_index;
4356 uint8_t *ptr;
4357 unsigned long pd;
4358 PhysPageDesc *p;
4360 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4361 if (!p) {
4362 pd = IO_MEM_UNASSIGNED;
4363 } else {
4364 pd = p->phys_offset;
4367 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4368 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4369 if (p)
4370 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4371 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4372 } else {
4373 unsigned long addr1;
4374 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4375 /* RAM case */
4376 ptr = qemu_get_ram_ptr(addr1);
4377 stw_p(ptr, val);
4378 if (!cpu_physical_memory_is_dirty(addr1)) {
4379 /* invalidate code */
4380 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4381 /* set dirty bit */
4382 cpu_physical_memory_set_dirty_flags(addr1,
4383 (0xff & ~CODE_DIRTY_FLAG));
4388 /* XXX: optimize */
4389 void stq_phys(target_phys_addr_t addr, uint64_t val)
4391 val = tswap64(val);
4392 cpu_physical_memory_write(addr, &val, 8);
4395 /* virtual memory access for debug (includes writing to ROM) */
4396 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4397 uint8_t *buf, int len, int is_write)
4399 int l;
4400 target_phys_addr_t phys_addr;
4401 target_ulong page;
4403 while (len > 0) {
4404 page = addr & TARGET_PAGE_MASK;
4405 phys_addr = cpu_get_phys_page_debug(env, page);
4406 /* if no physical page mapped, return an error */
4407 if (phys_addr == -1)
4408 return -1;
4409 l = (page + TARGET_PAGE_SIZE) - addr;
4410 if (l > len)
4411 l = len;
4412 phys_addr += (addr & ~TARGET_PAGE_MASK);
4413 if (is_write)
4414 cpu_physical_memory_write_rom(phys_addr, buf, l);
4415 else
4416 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4417 len -= l;
4418 buf += l;
4419 addr += l;
4421 return 0;
4423 #endif
4425 /* in deterministic execution mode, instructions doing device I/Os
4426 must be at the end of the TB */
4427 void cpu_io_recompile(CPUState *env, void *retaddr)
4429 TranslationBlock *tb;
4430 uint32_t n, cflags;
4431 target_ulong pc, cs_base;
4432 uint64_t flags;
4434 tb = tb_find_pc((unsigned long)retaddr);
4435 if (!tb) {
4436 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4437 retaddr);
4439 n = env->icount_decr.u16.low + tb->icount;
4440 cpu_restore_state(tb, env, (unsigned long)retaddr);
4441 /* Calculate how many instructions had been executed before the fault
4442 occurred. */
4443 n = n - env->icount_decr.u16.low;
4444 /* Generate a new TB ending on the I/O insn. */
4445 n++;
4446 /* On MIPS and SH, delay slot instructions can only be restarted if
4447 they were already the first instruction in the TB. If this is not
4448 the first instruction in a TB then re-execute the preceding
4449 branch. */
4450 #if defined(TARGET_MIPS)
4451 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4452 env->active_tc.PC -= 4;
4453 env->icount_decr.u16.low++;
4454 env->hflags &= ~MIPS_HFLAG_BMASK;
4456 #elif defined(TARGET_SH4)
4457 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4458 && n > 1) {
4459 env->pc -= 2;
4460 env->icount_decr.u16.low++;
4461 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4463 #endif
4464 /* This should never happen. */
4465 if (n > CF_COUNT_MASK)
4466 cpu_abort(env, "TB too big during recompile");
4468 cflags = n | CF_LAST_IO;
4469 pc = tb->pc;
4470 cs_base = tb->cs_base;
4471 flags = tb->flags;
4472 tb_phys_invalidate(tb, -1);
4473 /* FIXME: In theory this could raise an exception. In practice
4474 we have already translated the block once so it's probably ok. */
4475 tb_gen_code(env, pc, cs_base, flags, cflags);
4476 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4477 the first in the TB) then we end up generating a whole new TB and
4478 repeating the fault, which is horribly inefficient.
4479 Better would be to execute just this insn uncached, or generate a
4480 second new TB. */
4481 cpu_resume_from_signal(env, NULL);
4484 #if !defined(CONFIG_USER_ONLY)
4486 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4488 int i, target_code_size, max_target_code_size;
4489 int direct_jmp_count, direct_jmp2_count, cross_page;
4490 TranslationBlock *tb;
4492 target_code_size = 0;
4493 max_target_code_size = 0;
4494 cross_page = 0;
4495 direct_jmp_count = 0;
4496 direct_jmp2_count = 0;
4497 for(i = 0; i < nb_tbs; i++) {
4498 tb = &tbs[i];
4499 target_code_size += tb->size;
4500 if (tb->size > max_target_code_size)
4501 max_target_code_size = tb->size;
4502 if (tb->page_addr[1] != -1)
4503 cross_page++;
4504 if (tb->tb_next_offset[0] != 0xffff) {
4505 direct_jmp_count++;
4506 if (tb->tb_next_offset[1] != 0xffff) {
4507 direct_jmp2_count++;
4511 /* XXX: avoid using doubles ? */
4512 cpu_fprintf(f, "Translation buffer state:\n");
4513 cpu_fprintf(f, "gen code size %td/%ld\n",
4514 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4515 cpu_fprintf(f, "TB count %d/%d\n",
4516 nb_tbs, code_gen_max_blocks);
4517 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4518 nb_tbs ? target_code_size / nb_tbs : 0,
4519 max_target_code_size);
4520 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4521 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4522 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4523 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4524 cross_page,
4525 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4526 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4527 direct_jmp_count,
4528 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4529 direct_jmp2_count,
4530 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4531 cpu_fprintf(f, "\nStatistics:\n");
4532 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4533 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4534 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4535 tcg_dump_info(f, cpu_fprintf);
4538 #define MMUSUFFIX _cmmu
4539 #define GETPC() NULL
4540 #define env cpu_single_env
4541 #define SOFTMMU_CODE_ACCESS
4543 #define SHIFT 0
4544 #include "softmmu_template.h"
4546 #define SHIFT 1
4547 #include "softmmu_template.h"
4549 #define SHIFT 2
4550 #include "softmmu_template.h"
4552 #define SHIFT 3
4553 #include "softmmu_template.h"
4555 #undef env
4557 #endif