Merge commit '60e0df25e415b00cf35c4d214eaba9dc19aaa9e6' into upstream-merge
[qemu/qemu-dev-zwu.git] / exec.c
blob6adac51a90bd5e7a1a8a1b1cae28603008ee50a3
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "exec-all.h"
30 #include "cache-utils.h"
32 #if !defined(TARGET_IA64)
33 #include "tcg.h"
34 #endif
35 #include "qemu-kvm.h"
37 #include "hw/hw.h"
38 #include "hw/qdev.h"
39 #include "osdep.h"
40 #include "kvm.h"
41 #include "hw/xen.h"
42 #include "qemu-timer.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #include <signal.h>
46 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
47 #include <sys/param.h>
48 #if __FreeBSD_version >= 700104
49 #define HAVE_KINFO_GETVMMAP
50 #define sigqueue sigqueue_freebsd /* avoid redefinition */
51 #include <sys/time.h>
52 #include <sys/proc.h>
53 #include <machine/profile.h>
54 #define _KERNEL
55 #include <sys/user.h>
56 #undef _KERNEL
57 #undef sigqueue
58 #include <libutil.h>
59 #endif
60 #endif
61 #else /* !CONFIG_USER_ONLY */
62 #include "xen-mapcache.h"
63 #endif
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_TLB
68 //#define DEBUG_UNASSIGNED
70 /* make various TB consistency checks */
71 //#define DEBUG_TB_CHECK
72 //#define DEBUG_TLB_CHECK
74 //#define DEBUG_IOPORT
75 //#define DEBUG_SUBPAGE
77 #if !defined(CONFIG_USER_ONLY)
78 /* TB consistency checks only implemented for usermode emulation. */
79 #undef DEBUG_TB_CHECK
80 #endif
82 #define SMC_BITMAP_USE_THRESHOLD 10
84 static TranslationBlock *tbs;
85 static int code_gen_max_blocks;
86 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
87 static int nb_tbs;
88 /* any access to the tbs or the page table must use this lock */
89 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
91 #if defined(__arm__) || defined(__sparc_v9__)
92 /* The prologue must be reachable with a direct jump. ARM and Sparc64
93 have limited branch ranges (possibly also PPC) so place it in a
94 section close to code segment. */
95 #define code_gen_section \
96 __attribute__((__section__(".gen_code"))) \
97 __attribute__((aligned (32)))
98 #elif defined(_WIN32)
99 /* Maximum alignment for Win32 is 16. */
100 #define code_gen_section \
101 __attribute__((aligned (16)))
102 #else
103 #define code_gen_section \
104 __attribute__((aligned (32)))
105 #endif
107 uint8_t code_gen_prologue[1024] code_gen_section;
108 static uint8_t *code_gen_buffer;
109 static unsigned long code_gen_buffer_size;
110 /* threshold to flush the translated code buffer */
111 static unsigned long code_gen_buffer_max_size;
112 static uint8_t *code_gen_ptr;
114 #if !defined(CONFIG_USER_ONLY)
115 int phys_ram_fd;
116 static int in_migration;
118 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
119 #endif
121 CPUState *first_cpu;
122 /* current CPU in the current thread. It is only valid inside
123 cpu_exec() */
124 CPUState *cpu_single_env;
125 /* 0 = Do not count executed instructions.
126 1 = Precise instruction counting.
127 2 = Adaptive rate instruction counting. */
128 int use_icount = 0;
129 /* Current instruction counter. While executing translated code this may
130 include some instructions that have not yet been executed. */
131 int64_t qemu_icount;
133 typedef struct PageDesc {
134 /* list of TBs intersecting this ram page */
135 TranslationBlock *first_tb;
136 /* in order to optimize self modifying code, we count the number
137 of lookups we do to a given page to use a bitmap */
138 unsigned int code_write_count;
139 uint8_t *code_bitmap;
140 #if defined(CONFIG_USER_ONLY)
141 unsigned long flags;
142 #endif
143 } PageDesc;
145 /* In system mode we want L1_MAP to be based on ram offsets,
146 while in user mode we want it to be based on virtual addresses. */
147 #if !defined(CONFIG_USER_ONLY)
148 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
149 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
150 #else
151 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
152 #endif
153 #else
154 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
155 #endif
157 /* Size of the L2 (and L3, etc) page tables. */
158 #define L2_BITS 10
159 #define L2_SIZE (1 << L2_BITS)
161 /* The bits remaining after N lower levels of page tables. */
162 #define P_L1_BITS_REM \
163 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
164 #define V_L1_BITS_REM \
165 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
167 /* Size of the L1 page table. Avoid silly small sizes. */
168 #if P_L1_BITS_REM < 4
169 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
170 #else
171 #define P_L1_BITS P_L1_BITS_REM
172 #endif
174 #if V_L1_BITS_REM < 4
175 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
176 #else
177 #define V_L1_BITS V_L1_BITS_REM
178 #endif
180 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
181 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
183 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
184 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
186 unsigned long qemu_real_host_page_size;
187 unsigned long qemu_host_page_bits;
188 unsigned long qemu_host_page_size;
189 unsigned long qemu_host_page_mask;
191 /* This is a multi-level map on the virtual address space.
192 The bottom level has pointers to PageDesc. */
193 static void *l1_map[V_L1_SIZE];
195 #if !defined(CONFIG_USER_ONLY)
196 typedef struct PhysPageDesc {
197 /* offset in host memory of the page + io_index in the low bits */
198 ram_addr_t phys_offset;
199 ram_addr_t region_offset;
200 } PhysPageDesc;
202 /* This is a multi-level map on the physical address space.
203 The bottom level has pointers to PhysPageDesc. */
204 static void *l1_phys_map[P_L1_SIZE];
206 static void io_mem_init(void);
208 /* io memory support */
209 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
210 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
211 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
212 static char io_mem_used[IO_MEM_NB_ENTRIES];
213 static int io_mem_watch;
214 #endif
216 /* log support */
217 #ifdef WIN32
218 static const char *logfilename = "qemu.log";
219 #else
220 static const char *logfilename = "/tmp/qemu.log";
221 #endif
222 FILE *logfile;
223 int loglevel;
224 static int log_append = 0;
226 /* statistics */
227 #if !defined(CONFIG_USER_ONLY)
228 static int tlb_flush_count;
229 #endif
230 static int tb_flush_count;
231 static int tb_phys_invalidate_count;
233 #ifdef _WIN32
234 static void map_exec(void *addr, long size)
236 DWORD old_protect;
237 VirtualProtect(addr, size,
238 PAGE_EXECUTE_READWRITE, &old_protect);
241 #else
242 static void map_exec(void *addr, long size)
244 unsigned long start, end, page_size;
246 page_size = getpagesize();
247 start = (unsigned long)addr;
248 start &= ~(page_size - 1);
250 end = (unsigned long)addr + size;
251 end += page_size - 1;
252 end &= ~(page_size - 1);
254 mprotect((void *)start, end - start,
255 PROT_READ | PROT_WRITE | PROT_EXEC);
257 #endif
259 static void page_init(void)
261 /* NOTE: we can always suppose that qemu_host_page_size >=
262 TARGET_PAGE_SIZE */
263 #ifdef _WIN32
265 SYSTEM_INFO system_info;
267 GetSystemInfo(&system_info);
268 qemu_real_host_page_size = system_info.dwPageSize;
270 #else
271 qemu_real_host_page_size = getpagesize();
272 #endif
273 if (qemu_host_page_size == 0)
274 qemu_host_page_size = qemu_real_host_page_size;
275 if (qemu_host_page_size < TARGET_PAGE_SIZE)
276 qemu_host_page_size = TARGET_PAGE_SIZE;
277 qemu_host_page_bits = 0;
278 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
279 qemu_host_page_bits++;
280 qemu_host_page_mask = ~(qemu_host_page_size - 1);
282 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
284 #ifdef HAVE_KINFO_GETVMMAP
285 struct kinfo_vmentry *freep;
286 int i, cnt;
288 freep = kinfo_getvmmap(getpid(), &cnt);
289 if (freep) {
290 mmap_lock();
291 for (i = 0; i < cnt; i++) {
292 unsigned long startaddr, endaddr;
294 startaddr = freep[i].kve_start;
295 endaddr = freep[i].kve_end;
296 if (h2g_valid(startaddr)) {
297 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
299 if (h2g_valid(endaddr)) {
300 endaddr = h2g(endaddr);
301 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
302 } else {
303 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
304 endaddr = ~0ul;
305 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
306 #endif
310 free(freep);
311 mmap_unlock();
313 #else
314 FILE *f;
316 last_brk = (unsigned long)sbrk(0);
318 f = fopen("/compat/linux/proc/self/maps", "r");
319 if (f) {
320 mmap_lock();
322 do {
323 unsigned long startaddr, endaddr;
324 int n;
326 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
328 if (n == 2 && h2g_valid(startaddr)) {
329 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
331 if (h2g_valid(endaddr)) {
332 endaddr = h2g(endaddr);
333 } else {
334 endaddr = ~0ul;
336 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
338 } while (!feof(f));
340 fclose(f);
341 mmap_unlock();
343 #endif
345 #endif
348 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
350 PageDesc *pd;
351 void **lp;
352 int i;
354 #if defined(CONFIG_USER_ONLY)
355 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
356 # define ALLOC(P, SIZE) \
357 do { \
358 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
359 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
360 } while (0)
361 #else
362 # define ALLOC(P, SIZE) \
363 do { P = qemu_mallocz(SIZE); } while (0)
364 #endif
366 /* Level 1. Always allocated. */
367 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
369 /* Level 2..N-1. */
370 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
371 void **p = *lp;
373 if (p == NULL) {
374 if (!alloc) {
375 return NULL;
377 ALLOC(p, sizeof(void *) * L2_SIZE);
378 *lp = p;
381 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
384 pd = *lp;
385 if (pd == NULL) {
386 if (!alloc) {
387 return NULL;
389 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
390 *lp = pd;
393 #undef ALLOC
395 return pd + (index & (L2_SIZE - 1));
398 static inline PageDesc *page_find(tb_page_addr_t index)
400 return page_find_alloc(index, 0);
403 #if !defined(CONFIG_USER_ONLY)
404 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
406 PhysPageDesc *pd;
407 void **lp;
408 int i;
410 /* Level 1. Always allocated. */
411 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
413 /* Level 2..N-1. */
414 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
415 void **p = *lp;
416 if (p == NULL) {
417 if (!alloc) {
418 return NULL;
420 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
422 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
425 pd = *lp;
426 if (pd == NULL) {
427 int i;
429 if (!alloc) {
430 return NULL;
433 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
435 for (i = 0; i < L2_SIZE; i++) {
436 pd[i].phys_offset = IO_MEM_UNASSIGNED;
437 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
441 return pd + (index & (L2_SIZE - 1));
444 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
446 return phys_page_find_alloc(index, 0);
449 static void tlb_protect_code(ram_addr_t ram_addr);
450 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
451 target_ulong vaddr);
452 #define mmap_lock() do { } while(0)
453 #define mmap_unlock() do { } while(0)
454 #endif
456 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
458 #if defined(CONFIG_USER_ONLY)
459 /* Currently it is not recommended to allocate big chunks of data in
460 user mode. It will change when a dedicated libc will be used */
461 #define USE_STATIC_CODE_GEN_BUFFER
462 #endif
464 #ifdef USE_STATIC_CODE_GEN_BUFFER
465 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
466 __attribute__((aligned (CODE_GEN_ALIGN)));
467 #endif
469 static void code_gen_alloc(unsigned long tb_size)
471 if (kvm_enabled())
472 return;
474 #ifdef USE_STATIC_CODE_GEN_BUFFER
475 code_gen_buffer = static_code_gen_buffer;
476 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
477 map_exec(code_gen_buffer, code_gen_buffer_size);
478 #else
479 code_gen_buffer_size = tb_size;
480 if (code_gen_buffer_size == 0) {
481 #if defined(CONFIG_USER_ONLY)
482 /* in user mode, phys_ram_size is not meaningful */
483 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
484 #else
485 /* XXX: needs adjustments */
486 code_gen_buffer_size = (unsigned long)(ram_size / 4);
487 #endif
489 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
490 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
491 /* The code gen buffer location may have constraints depending on
492 the host cpu and OS */
493 #if defined(__linux__)
495 int flags;
496 void *start = NULL;
498 flags = MAP_PRIVATE | MAP_ANONYMOUS;
499 #if defined(__x86_64__)
500 flags |= MAP_32BIT;
501 /* Cannot map more than that */
502 if (code_gen_buffer_size > (800 * 1024 * 1024))
503 code_gen_buffer_size = (800 * 1024 * 1024);
504 #elif defined(__sparc_v9__)
505 // Map the buffer below 2G, so we can use direct calls and branches
506 flags |= MAP_FIXED;
507 start = (void *) 0x60000000UL;
508 if (code_gen_buffer_size > (512 * 1024 * 1024))
509 code_gen_buffer_size = (512 * 1024 * 1024);
510 #elif defined(__arm__)
511 /* Map the buffer below 32M, so we can use direct calls and branches */
512 flags |= MAP_FIXED;
513 start = (void *) 0x01000000UL;
514 if (code_gen_buffer_size > 16 * 1024 * 1024)
515 code_gen_buffer_size = 16 * 1024 * 1024;
516 #elif defined(__s390x__)
517 /* Map the buffer so that we can use direct calls and branches. */
518 /* We have a +- 4GB range on the branches; leave some slop. */
519 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
520 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
522 start = (void *)0x90000000UL;
523 #endif
524 code_gen_buffer = mmap(start, code_gen_buffer_size,
525 PROT_WRITE | PROT_READ | PROT_EXEC,
526 flags, -1, 0);
527 if (code_gen_buffer == MAP_FAILED) {
528 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
529 exit(1);
532 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
533 || defined(__DragonFly__) || defined(__OpenBSD__)
535 int flags;
536 void *addr = NULL;
537 flags = MAP_PRIVATE | MAP_ANONYMOUS;
538 #if defined(__x86_64__)
539 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
540 * 0x40000000 is free */
541 flags |= MAP_FIXED;
542 addr = (void *)0x40000000;
543 /* Cannot map more than that */
544 if (code_gen_buffer_size > (800 * 1024 * 1024))
545 code_gen_buffer_size = (800 * 1024 * 1024);
546 #elif defined(__sparc_v9__)
547 // Map the buffer below 2G, so we can use direct calls and branches
548 flags |= MAP_FIXED;
549 addr = (void *) 0x60000000UL;
550 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
551 code_gen_buffer_size = (512 * 1024 * 1024);
553 #endif
554 code_gen_buffer = mmap(addr, code_gen_buffer_size,
555 PROT_WRITE | PROT_READ | PROT_EXEC,
556 flags, -1, 0);
557 if (code_gen_buffer == MAP_FAILED) {
558 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
559 exit(1);
562 #else
563 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
564 map_exec(code_gen_buffer, code_gen_buffer_size);
565 #endif
566 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
567 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
568 code_gen_buffer_max_size = code_gen_buffer_size -
569 (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
570 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
571 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
574 /* Must be called before using the QEMU cpus. 'tb_size' is the size
575 (in bytes) allocated to the translation buffer. Zero means default
576 size. */
577 void cpu_exec_init_all(unsigned long tb_size)
579 cpu_gen_init();
580 code_gen_alloc(tb_size);
581 code_gen_ptr = code_gen_buffer;
582 page_init();
583 #if !defined(CONFIG_USER_ONLY)
584 io_mem_init();
585 #endif
586 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
587 /* There's no guest base to take into account, so go ahead and
588 initialize the prologue now. */
589 tcg_prologue_init(&tcg_ctx);
590 #endif
593 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
595 static int cpu_common_post_load(void *opaque, int version_id)
597 CPUState *env = opaque;
599 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
600 version_id is increased. */
601 env->interrupt_request &= ~0x01;
602 tlb_flush(env, 1);
604 return 0;
607 static const VMStateDescription vmstate_cpu_common = {
608 .name = "cpu_common",
609 .version_id = 1,
610 .minimum_version_id = 1,
611 .minimum_version_id_old = 1,
612 .post_load = cpu_common_post_load,
613 .fields = (VMStateField []) {
614 VMSTATE_UINT32(halted, CPUState),
615 VMSTATE_UINT32(interrupt_request, CPUState),
616 VMSTATE_END_OF_LIST()
619 #endif
621 CPUState *qemu_get_cpu(int cpu)
623 CPUState *env = first_cpu;
625 while (env) {
626 if (env->cpu_index == cpu)
627 break;
628 env = env->next_cpu;
631 return env;
634 void cpu_exec_init(CPUState *env)
636 CPUState **penv;
637 int cpu_index;
639 #if defined(CONFIG_USER_ONLY)
640 cpu_list_lock();
641 #endif
642 env->next_cpu = NULL;
643 penv = &first_cpu;
644 cpu_index = 0;
645 while (*penv != NULL) {
646 penv = &(*penv)->next_cpu;
647 cpu_index++;
649 env->cpu_index = cpu_index;
650 env->numa_node = 0;
651 QTAILQ_INIT(&env->breakpoints);
652 QTAILQ_INIT(&env->watchpoints);
653 #ifndef CONFIG_USER_ONLY
654 env->thread_id = qemu_get_thread_id();
655 #endif
656 *penv = env;
657 #if defined(CONFIG_USER_ONLY)
658 cpu_list_unlock();
659 #endif
660 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
661 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
662 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
663 cpu_save, cpu_load, env);
664 #endif
667 /* Allocate a new translation block. Flush the translation buffer if
668 too many translation blocks or too much generated code. */
669 static TranslationBlock *tb_alloc(target_ulong pc)
671 TranslationBlock *tb;
673 if (nb_tbs >= code_gen_max_blocks ||
674 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
675 return NULL;
676 tb = &tbs[nb_tbs++];
677 tb->pc = pc;
678 tb->cflags = 0;
679 return tb;
682 void tb_free(TranslationBlock *tb)
684 /* In practice this is mostly used for single use temporary TB
685 Ignore the hard cases and just back up if this TB happens to
686 be the last one generated. */
687 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
688 code_gen_ptr = tb->tc_ptr;
689 nb_tbs--;
693 static inline void invalidate_page_bitmap(PageDesc *p)
695 if (p->code_bitmap) {
696 qemu_free(p->code_bitmap);
697 p->code_bitmap = NULL;
699 p->code_write_count = 0;
702 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
704 static void page_flush_tb_1 (int level, void **lp)
706 int i;
708 if (*lp == NULL) {
709 return;
711 if (level == 0) {
712 PageDesc *pd = *lp;
713 for (i = 0; i < L2_SIZE; ++i) {
714 pd[i].first_tb = NULL;
715 invalidate_page_bitmap(pd + i);
717 } else {
718 void **pp = *lp;
719 for (i = 0; i < L2_SIZE; ++i) {
720 page_flush_tb_1 (level - 1, pp + i);
725 static void page_flush_tb(void)
727 int i;
728 for (i = 0; i < V_L1_SIZE; i++) {
729 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
733 /* flush all the translation blocks */
734 /* XXX: tb_flush is currently not thread safe */
735 void tb_flush(CPUState *env1)
737 CPUState *env;
738 #if defined(DEBUG_FLUSH)
739 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
740 (unsigned long)(code_gen_ptr - code_gen_buffer),
741 nb_tbs, nb_tbs > 0 ?
742 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
743 #endif
744 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
745 cpu_abort(env1, "Internal error: code buffer overflow\n");
747 nb_tbs = 0;
749 for(env = first_cpu; env != NULL; env = env->next_cpu) {
750 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
753 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
754 page_flush_tb();
756 code_gen_ptr = code_gen_buffer;
757 /* XXX: flush processor icache at this point if cache flush is
758 expensive */
759 tb_flush_count++;
762 #ifdef DEBUG_TB_CHECK
764 static void tb_invalidate_check(target_ulong address)
766 TranslationBlock *tb;
767 int i;
768 address &= TARGET_PAGE_MASK;
769 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
770 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
771 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
772 address >= tb->pc + tb->size)) {
773 printf("ERROR invalidate: address=" TARGET_FMT_lx
774 " PC=%08lx size=%04x\n",
775 address, (long)tb->pc, tb->size);
781 /* verify that all the pages have correct rights for code */
782 static void tb_page_check(void)
784 TranslationBlock *tb;
785 int i, flags1, flags2;
787 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
788 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
789 flags1 = page_get_flags(tb->pc);
790 flags2 = page_get_flags(tb->pc + tb->size - 1);
791 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
792 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
793 (long)tb->pc, tb->size, flags1, flags2);
799 #endif
801 /* invalidate one TB */
802 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
803 int next_offset)
805 TranslationBlock *tb1;
806 for(;;) {
807 tb1 = *ptb;
808 if (tb1 == tb) {
809 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
810 break;
812 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
816 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
818 TranslationBlock *tb1;
819 unsigned int n1;
821 for(;;) {
822 tb1 = *ptb;
823 n1 = (long)tb1 & 3;
824 tb1 = (TranslationBlock *)((long)tb1 & ~3);
825 if (tb1 == tb) {
826 *ptb = tb1->page_next[n1];
827 break;
829 ptb = &tb1->page_next[n1];
833 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
835 TranslationBlock *tb1, **ptb;
836 unsigned int n1;
838 ptb = &tb->jmp_next[n];
839 tb1 = *ptb;
840 if (tb1) {
841 /* find tb(n) in circular list */
842 for(;;) {
843 tb1 = *ptb;
844 n1 = (long)tb1 & 3;
845 tb1 = (TranslationBlock *)((long)tb1 & ~3);
846 if (n1 == n && tb1 == tb)
847 break;
848 if (n1 == 2) {
849 ptb = &tb1->jmp_first;
850 } else {
851 ptb = &tb1->jmp_next[n1];
854 /* now we can suppress tb(n) from the list */
855 *ptb = tb->jmp_next[n];
857 tb->jmp_next[n] = NULL;
861 /* reset the jump entry 'n' of a TB so that it is not chained to
862 another TB */
863 static inline void tb_reset_jump(TranslationBlock *tb, int n)
865 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
868 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
870 CPUState *env;
871 PageDesc *p;
872 unsigned int h, n1;
873 tb_page_addr_t phys_pc;
874 TranslationBlock *tb1, *tb2;
876 /* remove the TB from the hash list */
877 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
878 h = tb_phys_hash_func(phys_pc);
879 tb_remove(&tb_phys_hash[h], tb,
880 offsetof(TranslationBlock, phys_hash_next));
882 /* remove the TB from the page list */
883 if (tb->page_addr[0] != page_addr) {
884 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
885 tb_page_remove(&p->first_tb, tb);
886 invalidate_page_bitmap(p);
888 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
889 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
890 tb_page_remove(&p->first_tb, tb);
891 invalidate_page_bitmap(p);
894 tb_invalidated_flag = 1;
896 /* remove the TB from the hash list */
897 h = tb_jmp_cache_hash_func(tb->pc);
898 for(env = first_cpu; env != NULL; env = env->next_cpu) {
899 if (env->tb_jmp_cache[h] == tb)
900 env->tb_jmp_cache[h] = NULL;
903 /* suppress this TB from the two jump lists */
904 tb_jmp_remove(tb, 0);
905 tb_jmp_remove(tb, 1);
907 /* suppress any remaining jumps to this TB */
908 tb1 = tb->jmp_first;
909 for(;;) {
910 n1 = (long)tb1 & 3;
911 if (n1 == 2)
912 break;
913 tb1 = (TranslationBlock *)((long)tb1 & ~3);
914 tb2 = tb1->jmp_next[n1];
915 tb_reset_jump(tb1, n1);
916 tb1->jmp_next[n1] = NULL;
917 tb1 = tb2;
919 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
921 tb_phys_invalidate_count++;
924 static inline void set_bits(uint8_t *tab, int start, int len)
926 int end, mask, end1;
928 end = start + len;
929 tab += start >> 3;
930 mask = 0xff << (start & 7);
931 if ((start & ~7) == (end & ~7)) {
932 if (start < end) {
933 mask &= ~(0xff << (end & 7));
934 *tab |= mask;
936 } else {
937 *tab++ |= mask;
938 start = (start + 8) & ~7;
939 end1 = end & ~7;
940 while (start < end1) {
941 *tab++ = 0xff;
942 start += 8;
944 if (start < end) {
945 mask = ~(0xff << (end & 7));
946 *tab |= mask;
951 static void build_page_bitmap(PageDesc *p)
953 int n, tb_start, tb_end;
954 TranslationBlock *tb;
956 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
958 tb = p->first_tb;
959 while (tb != NULL) {
960 n = (long)tb & 3;
961 tb = (TranslationBlock *)((long)tb & ~3);
962 /* NOTE: this is subtle as a TB may span two physical pages */
963 if (n == 0) {
964 /* NOTE: tb_end may be after the end of the page, but
965 it is not a problem */
966 tb_start = tb->pc & ~TARGET_PAGE_MASK;
967 tb_end = tb_start + tb->size;
968 if (tb_end > TARGET_PAGE_SIZE)
969 tb_end = TARGET_PAGE_SIZE;
970 } else {
971 tb_start = 0;
972 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
974 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
975 tb = tb->page_next[n];
979 TranslationBlock *tb_gen_code(CPUState *env,
980 target_ulong pc, target_ulong cs_base,
981 int flags, int cflags)
983 TranslationBlock *tb;
984 uint8_t *tc_ptr;
985 tb_page_addr_t phys_pc, phys_page2;
986 target_ulong virt_page2;
987 int code_gen_size;
989 phys_pc = get_page_addr_code(env, pc);
990 tb = tb_alloc(pc);
991 if (!tb) {
992 /* flush must be done */
993 tb_flush(env);
994 /* cannot fail at this point */
995 tb = tb_alloc(pc);
996 /* Don't forget to invalidate previous TB info. */
997 tb_invalidated_flag = 1;
999 tc_ptr = code_gen_ptr;
1000 tb->tc_ptr = tc_ptr;
1001 tb->cs_base = cs_base;
1002 tb->flags = flags;
1003 tb->cflags = cflags;
1004 cpu_gen_code(env, tb, &code_gen_size);
1005 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1007 /* check next page if needed */
1008 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1009 phys_page2 = -1;
1010 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1011 phys_page2 = get_page_addr_code(env, virt_page2);
1013 tb_link_page(tb, phys_pc, phys_page2);
1014 return tb;
1017 /* invalidate all TBs which intersect with the target physical page
1018 starting in range [start;end[. NOTE: start and end must refer to
1019 the same physical page. 'is_cpu_write_access' should be true if called
1020 from a real cpu write access: the virtual CPU will exit the current
1021 TB if code is modified inside this TB. */
1022 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1023 int is_cpu_write_access)
1025 TranslationBlock *tb, *tb_next, *saved_tb;
1026 CPUState *env = cpu_single_env;
1027 tb_page_addr_t tb_start, tb_end;
1028 PageDesc *p;
1029 int n;
1030 #ifdef TARGET_HAS_PRECISE_SMC
1031 int current_tb_not_found = is_cpu_write_access;
1032 TranslationBlock *current_tb = NULL;
1033 int current_tb_modified = 0;
1034 target_ulong current_pc = 0;
1035 target_ulong current_cs_base = 0;
1036 int current_flags = 0;
1037 #endif /* TARGET_HAS_PRECISE_SMC */
1039 p = page_find(start >> TARGET_PAGE_BITS);
1040 if (!p)
1041 return;
1042 if (!p->code_bitmap &&
1043 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1044 is_cpu_write_access) {
1045 /* build code bitmap */
1046 build_page_bitmap(p);
1049 /* we remove all the TBs in the range [start, end[ */
1050 /* XXX: see if in some cases it could be faster to invalidate all the code */
1051 tb = p->first_tb;
1052 while (tb != NULL) {
1053 n = (long)tb & 3;
1054 tb = (TranslationBlock *)((long)tb & ~3);
1055 tb_next = tb->page_next[n];
1056 /* NOTE: this is subtle as a TB may span two physical pages */
1057 if (n == 0) {
1058 /* NOTE: tb_end may be after the end of the page, but
1059 it is not a problem */
1060 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1061 tb_end = tb_start + tb->size;
1062 } else {
1063 tb_start = tb->page_addr[1];
1064 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1066 if (!(tb_end <= start || tb_start >= end)) {
1067 #ifdef TARGET_HAS_PRECISE_SMC
1068 if (current_tb_not_found) {
1069 current_tb_not_found = 0;
1070 current_tb = NULL;
1071 if (env->mem_io_pc) {
1072 /* now we have a real cpu fault */
1073 current_tb = tb_find_pc(env->mem_io_pc);
1076 if (current_tb == tb &&
1077 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1078 /* If we are modifying the current TB, we must stop
1079 its execution. We could be more precise by checking
1080 that the modification is after the current PC, but it
1081 would require a specialized function to partially
1082 restore the CPU state */
1084 current_tb_modified = 1;
1085 cpu_restore_state(current_tb, env, env->mem_io_pc);
1086 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1087 &current_flags);
1089 #endif /* TARGET_HAS_PRECISE_SMC */
1090 /* we need to do that to handle the case where a signal
1091 occurs while doing tb_phys_invalidate() */
1092 saved_tb = NULL;
1093 if (env) {
1094 saved_tb = env->current_tb;
1095 env->current_tb = NULL;
1097 tb_phys_invalidate(tb, -1);
1098 if (env) {
1099 env->current_tb = saved_tb;
1100 if (env->interrupt_request && env->current_tb)
1101 cpu_interrupt(env, env->interrupt_request);
1104 tb = tb_next;
1106 #if !defined(CONFIG_USER_ONLY)
1107 /* if no code remaining, no need to continue to use slow writes */
1108 if (!p->first_tb) {
1109 invalidate_page_bitmap(p);
1110 if (is_cpu_write_access) {
1111 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1114 #endif
1115 #ifdef TARGET_HAS_PRECISE_SMC
1116 if (current_tb_modified) {
1117 /* we generate a block containing just the instruction
1118 modifying the memory. It will ensure that it cannot modify
1119 itself */
1120 env->current_tb = NULL;
1121 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1122 cpu_resume_from_signal(env, NULL);
1124 #endif
1127 /* len must be <= 8 and start must be a multiple of len */
1128 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1130 PageDesc *p;
1131 int offset, b;
1132 #if 0
1133 if (1) {
1134 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1135 cpu_single_env->mem_io_vaddr, len,
1136 cpu_single_env->eip,
1137 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1139 #endif
1140 p = page_find(start >> TARGET_PAGE_BITS);
1141 if (!p)
1142 return;
1143 if (p->code_bitmap) {
1144 offset = start & ~TARGET_PAGE_MASK;
1145 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1146 if (b & ((1 << len) - 1))
1147 goto do_invalidate;
1148 } else {
1149 do_invalidate:
1150 tb_invalidate_phys_page_range(start, start + len, 1);
1154 #if !defined(CONFIG_SOFTMMU)
1155 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1156 unsigned long pc, void *puc)
1158 TranslationBlock *tb;
1159 PageDesc *p;
1160 int n;
1161 #ifdef TARGET_HAS_PRECISE_SMC
1162 TranslationBlock *current_tb = NULL;
1163 CPUState *env = cpu_single_env;
1164 int current_tb_modified = 0;
1165 target_ulong current_pc = 0;
1166 target_ulong current_cs_base = 0;
1167 int current_flags = 0;
1168 #endif
1170 addr &= TARGET_PAGE_MASK;
1171 p = page_find(addr >> TARGET_PAGE_BITS);
1172 if (!p)
1173 return;
1174 tb = p->first_tb;
1175 #ifdef TARGET_HAS_PRECISE_SMC
1176 if (tb && pc != 0) {
1177 current_tb = tb_find_pc(pc);
1179 #endif
1180 while (tb != NULL) {
1181 n = (long)tb & 3;
1182 tb = (TranslationBlock *)((long)tb & ~3);
1183 #ifdef TARGET_HAS_PRECISE_SMC
1184 if (current_tb == tb &&
1185 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1186 /* If we are modifying the current TB, we must stop
1187 its execution. We could be more precise by checking
1188 that the modification is after the current PC, but it
1189 would require a specialized function to partially
1190 restore the CPU state */
1192 current_tb_modified = 1;
1193 cpu_restore_state(current_tb, env, pc);
1194 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1195 &current_flags);
1197 #endif /* TARGET_HAS_PRECISE_SMC */
1198 tb_phys_invalidate(tb, addr);
1199 tb = tb->page_next[n];
1201 p->first_tb = NULL;
1202 #ifdef TARGET_HAS_PRECISE_SMC
1203 if (current_tb_modified) {
1204 /* we generate a block containing just the instruction
1205 modifying the memory. It will ensure that it cannot modify
1206 itself */
1207 env->current_tb = NULL;
1208 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1209 cpu_resume_from_signal(env, puc);
1211 #endif
1213 #endif
1215 /* add the tb in the target page and protect it if necessary */
1216 static inline void tb_alloc_page(TranslationBlock *tb,
1217 unsigned int n, tb_page_addr_t page_addr)
1219 PageDesc *p;
1220 TranslationBlock *last_first_tb;
1222 tb->page_addr[n] = page_addr;
1223 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1224 tb->page_next[n] = p->first_tb;
1225 last_first_tb = p->first_tb;
1226 p->first_tb = (TranslationBlock *)((long)tb | n);
1227 invalidate_page_bitmap(p);
1229 #if defined(TARGET_HAS_SMC) || 1
1231 #if defined(CONFIG_USER_ONLY)
1232 if (p->flags & PAGE_WRITE) {
1233 target_ulong addr;
1234 PageDesc *p2;
1235 int prot;
1237 /* force the host page as non writable (writes will have a
1238 page fault + mprotect overhead) */
1239 page_addr &= qemu_host_page_mask;
1240 prot = 0;
1241 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1242 addr += TARGET_PAGE_SIZE) {
1244 p2 = page_find (addr >> TARGET_PAGE_BITS);
1245 if (!p2)
1246 continue;
1247 prot |= p2->flags;
1248 p2->flags &= ~PAGE_WRITE;
1250 mprotect(g2h(page_addr), qemu_host_page_size,
1251 (prot & PAGE_BITS) & ~PAGE_WRITE);
1252 #ifdef DEBUG_TB_INVALIDATE
1253 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1254 page_addr);
1255 #endif
1257 #else
1258 /* if some code is already present, then the pages are already
1259 protected. So we handle the case where only the first TB is
1260 allocated in a physical page */
1261 if (!last_first_tb) {
1262 tlb_protect_code(page_addr);
1264 #endif
1266 #endif /* TARGET_HAS_SMC */
1269 /* add a new TB and link it to the physical page tables. phys_page2 is
1270 (-1) to indicate that only one page contains the TB. */
1271 void tb_link_page(TranslationBlock *tb,
1272 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1274 unsigned int h;
1275 TranslationBlock **ptb;
1277 /* Grab the mmap lock to stop another thread invalidating this TB
1278 before we are done. */
1279 mmap_lock();
1280 /* add in the physical hash table */
1281 h = tb_phys_hash_func(phys_pc);
1282 ptb = &tb_phys_hash[h];
1283 tb->phys_hash_next = *ptb;
1284 *ptb = tb;
1286 /* add in the page list */
1287 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1288 if (phys_page2 != -1)
1289 tb_alloc_page(tb, 1, phys_page2);
1290 else
1291 tb->page_addr[1] = -1;
1293 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1294 tb->jmp_next[0] = NULL;
1295 tb->jmp_next[1] = NULL;
1297 /* init original jump addresses */
1298 if (tb->tb_next_offset[0] != 0xffff)
1299 tb_reset_jump(tb, 0);
1300 if (tb->tb_next_offset[1] != 0xffff)
1301 tb_reset_jump(tb, 1);
1303 #ifdef DEBUG_TB_CHECK
1304 tb_page_check();
1305 #endif
1306 mmap_unlock();
1309 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1310 tb[1].tc_ptr. Return NULL if not found */
1311 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1313 int m_min, m_max, m;
1314 unsigned long v;
1315 TranslationBlock *tb;
1317 if (nb_tbs <= 0)
1318 return NULL;
1319 if (tc_ptr < (unsigned long)code_gen_buffer ||
1320 tc_ptr >= (unsigned long)code_gen_ptr)
1321 return NULL;
1322 /* binary search (cf Knuth) */
1323 m_min = 0;
1324 m_max = nb_tbs - 1;
1325 while (m_min <= m_max) {
1326 m = (m_min + m_max) >> 1;
1327 tb = &tbs[m];
1328 v = (unsigned long)tb->tc_ptr;
1329 if (v == tc_ptr)
1330 return tb;
1331 else if (tc_ptr < v) {
1332 m_max = m - 1;
1333 } else {
1334 m_min = m + 1;
1337 return &tbs[m_max];
1340 static void tb_reset_jump_recursive(TranslationBlock *tb);
1342 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1344 TranslationBlock *tb1, *tb_next, **ptb;
1345 unsigned int n1;
1347 tb1 = tb->jmp_next[n];
1348 if (tb1 != NULL) {
1349 /* find head of list */
1350 for(;;) {
1351 n1 = (long)tb1 & 3;
1352 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1353 if (n1 == 2)
1354 break;
1355 tb1 = tb1->jmp_next[n1];
1357 /* we are now sure now that tb jumps to tb1 */
1358 tb_next = tb1;
1360 /* remove tb from the jmp_first list */
1361 ptb = &tb_next->jmp_first;
1362 for(;;) {
1363 tb1 = *ptb;
1364 n1 = (long)tb1 & 3;
1365 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1366 if (n1 == n && tb1 == tb)
1367 break;
1368 ptb = &tb1->jmp_next[n1];
1370 *ptb = tb->jmp_next[n];
1371 tb->jmp_next[n] = NULL;
1373 /* suppress the jump to next tb in generated code */
1374 tb_reset_jump(tb, n);
1376 /* suppress jumps in the tb on which we could have jumped */
1377 tb_reset_jump_recursive(tb_next);
1381 static void tb_reset_jump_recursive(TranslationBlock *tb)
1383 tb_reset_jump_recursive2(tb, 0);
1384 tb_reset_jump_recursive2(tb, 1);
1387 #if defined(TARGET_HAS_ICE)
1388 #if defined(CONFIG_USER_ONLY)
1389 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1391 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1393 #else
1394 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1396 target_phys_addr_t addr;
1397 target_ulong pd;
1398 ram_addr_t ram_addr;
1399 PhysPageDesc *p;
1401 addr = cpu_get_phys_page_debug(env, pc);
1402 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1403 if (!p) {
1404 pd = IO_MEM_UNASSIGNED;
1405 } else {
1406 pd = p->phys_offset;
1408 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1409 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1411 #endif
1412 #endif /* TARGET_HAS_ICE */
1414 #if defined(CONFIG_USER_ONLY)
1415 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1420 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1421 int flags, CPUWatchpoint **watchpoint)
1423 return -ENOSYS;
1425 #else
1426 /* Add a watchpoint. */
1427 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1428 int flags, CPUWatchpoint **watchpoint)
1430 target_ulong len_mask = ~(len - 1);
1431 CPUWatchpoint *wp;
1433 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1434 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1435 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1436 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1437 return -EINVAL;
1439 wp = qemu_malloc(sizeof(*wp));
1441 wp->vaddr = addr;
1442 wp->len_mask = len_mask;
1443 wp->flags = flags;
1445 /* keep all GDB-injected watchpoints in front */
1446 if (flags & BP_GDB)
1447 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1448 else
1449 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1451 tlb_flush_page(env, addr);
1453 if (watchpoint)
1454 *watchpoint = wp;
1455 return 0;
1458 /* Remove a specific watchpoint. */
1459 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1460 int flags)
1462 target_ulong len_mask = ~(len - 1);
1463 CPUWatchpoint *wp;
1465 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1466 if (addr == wp->vaddr && len_mask == wp->len_mask
1467 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1468 cpu_watchpoint_remove_by_ref(env, wp);
1469 return 0;
1472 return -ENOENT;
1475 /* Remove a specific watchpoint by reference. */
1476 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1478 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1480 tlb_flush_page(env, watchpoint->vaddr);
1482 qemu_free(watchpoint);
1485 /* Remove all matching watchpoints. */
1486 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1488 CPUWatchpoint *wp, *next;
1490 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1491 if (wp->flags & mask)
1492 cpu_watchpoint_remove_by_ref(env, wp);
1495 #endif
1497 /* Add a breakpoint. */
1498 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1499 CPUBreakpoint **breakpoint)
1501 #if defined(TARGET_HAS_ICE)
1502 CPUBreakpoint *bp;
1504 bp = qemu_malloc(sizeof(*bp));
1506 bp->pc = pc;
1507 bp->flags = flags;
1509 /* keep all GDB-injected breakpoints in front */
1510 if (flags & BP_GDB)
1511 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1512 else
1513 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1515 breakpoint_invalidate(env, pc);
1517 if (breakpoint)
1518 *breakpoint = bp;
1519 return 0;
1520 #else
1521 return -ENOSYS;
1522 #endif
1525 /* Remove a specific breakpoint. */
1526 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1528 #if defined(TARGET_HAS_ICE)
1529 CPUBreakpoint *bp;
1531 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1532 if (bp->pc == pc && bp->flags == flags) {
1533 cpu_breakpoint_remove_by_ref(env, bp);
1534 return 0;
1537 return -ENOENT;
1538 #else
1539 return -ENOSYS;
1540 #endif
1543 /* Remove a specific breakpoint by reference. */
1544 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1546 #if defined(TARGET_HAS_ICE)
1547 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1549 breakpoint_invalidate(env, breakpoint->pc);
1551 qemu_free(breakpoint);
1552 #endif
1555 /* Remove all matching breakpoints. */
1556 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1558 #if defined(TARGET_HAS_ICE)
1559 CPUBreakpoint *bp, *next;
1561 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1562 if (bp->flags & mask)
1563 cpu_breakpoint_remove_by_ref(env, bp);
1565 #endif
1568 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1569 CPU loop after each instruction */
1570 void cpu_single_step(CPUState *env, int enabled)
1572 #if defined(TARGET_HAS_ICE)
1573 if (env->singlestep_enabled != enabled) {
1574 env->singlestep_enabled = enabled;
1575 if (kvm_enabled())
1576 kvm_update_guest_debug(env, 0);
1577 else {
1578 /* must flush all the translated code to avoid inconsistencies */
1579 /* XXX: only flush what is necessary */
1580 tb_flush(env);
1583 #endif
1586 /* enable or disable low levels log */
1587 void cpu_set_log(int log_flags)
1589 loglevel = log_flags;
1590 if (loglevel && !logfile) {
1591 logfile = fopen(logfilename, log_append ? "a" : "w");
1592 if (!logfile) {
1593 perror(logfilename);
1594 _exit(1);
1596 #if !defined(CONFIG_SOFTMMU)
1597 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1599 static char logfile_buf[4096];
1600 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1602 #elif !defined(_WIN32)
1603 /* Win32 doesn't support line-buffering and requires size >= 2 */
1604 setvbuf(logfile, NULL, _IOLBF, 0);
1605 #endif
1606 log_append = 1;
1608 if (!loglevel && logfile) {
1609 fclose(logfile);
1610 logfile = NULL;
1614 void cpu_set_log_filename(const char *filename)
1616 logfilename = strdup(filename);
1617 if (logfile) {
1618 fclose(logfile);
1619 logfile = NULL;
1621 cpu_set_log(loglevel);
1624 static void cpu_unlink_tb(CPUState *env)
1626 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1627 problem and hope the cpu will stop of its own accord. For userspace
1628 emulation this often isn't actually as bad as it sounds. Often
1629 signals are used primarily to interrupt blocking syscalls. */
1630 TranslationBlock *tb;
1631 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1633 spin_lock(&interrupt_lock);
1634 tb = env->current_tb;
1635 /* if the cpu is currently executing code, we must unlink it and
1636 all the potentially executing TB */
1637 if (tb) {
1638 env->current_tb = NULL;
1639 tb_reset_jump_recursive(tb);
1641 spin_unlock(&interrupt_lock);
1644 #ifndef CONFIG_USER_ONLY
1645 /* mask must never be zero, except for A20 change call */
1646 static void tcg_handle_interrupt(CPUState *env, int mask)
1648 int old_mask;
1650 old_mask = env->interrupt_request;
1651 env->interrupt_request |= mask;
1652 if (kvm_enabled() && !kvm_irqchip_in_kernel())
1653 kvm_update_interrupt_request(env);
1656 * If called from iothread context, wake the target cpu in
1657 * case its halted.
1659 if (!qemu_cpu_is_self(env)) {
1660 qemu_cpu_kick(env);
1661 return;
1664 if (use_icount) {
1665 env->icount_decr.u16.high = 0xffff;
1666 if (!can_do_io(env)
1667 && (mask & ~old_mask) != 0) {
1668 cpu_abort(env, "Raised interrupt while not in I/O function");
1670 } else {
1671 cpu_unlink_tb(env);
1675 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1677 #else /* CONFIG_USER_ONLY */
1679 void cpu_interrupt(CPUState *env, int mask)
1681 env->interrupt_request |= mask;
1682 cpu_unlink_tb(env);
1684 #endif /* CONFIG_USER_ONLY */
1686 void cpu_reset_interrupt(CPUState *env, int mask)
1688 env->interrupt_request &= ~mask;
1691 void cpu_exit(CPUState *env)
1693 env->exit_request = 1;
1694 cpu_unlink_tb(env);
1697 const CPULogItem cpu_log_items[] = {
1698 { CPU_LOG_TB_OUT_ASM, "out_asm",
1699 "show generated host assembly code for each compiled TB" },
1700 { CPU_LOG_TB_IN_ASM, "in_asm",
1701 "show target assembly code for each compiled TB" },
1702 { CPU_LOG_TB_OP, "op",
1703 "show micro ops for each compiled TB" },
1704 { CPU_LOG_TB_OP_OPT, "op_opt",
1705 "show micro ops "
1706 #ifdef TARGET_I386
1707 "before eflags optimization and "
1708 #endif
1709 "after liveness analysis" },
1710 { CPU_LOG_INT, "int",
1711 "show interrupts/exceptions in short format" },
1712 { CPU_LOG_EXEC, "exec",
1713 "show trace before each executed TB (lots of logs)" },
1714 { CPU_LOG_TB_CPU, "cpu",
1715 "show CPU state before block translation" },
1716 #ifdef TARGET_I386
1717 { CPU_LOG_PCALL, "pcall",
1718 "show protected mode far calls/returns/exceptions" },
1719 { CPU_LOG_RESET, "cpu_reset",
1720 "show CPU state before CPU resets" },
1721 #endif
1722 #ifdef DEBUG_IOPORT
1723 { CPU_LOG_IOPORT, "ioport",
1724 "show all i/o ports accesses" },
1725 #endif
1726 { 0, NULL, NULL },
1729 #ifndef CONFIG_USER_ONLY
1730 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1731 = QLIST_HEAD_INITIALIZER(memory_client_list);
1733 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1734 ram_addr_t size,
1735 ram_addr_t phys_offset,
1736 bool log_dirty)
1738 CPUPhysMemoryClient *client;
1739 QLIST_FOREACH(client, &memory_client_list, list) {
1740 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1744 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1745 target_phys_addr_t end)
1747 CPUPhysMemoryClient *client;
1748 QLIST_FOREACH(client, &memory_client_list, list) {
1749 int r = client->sync_dirty_bitmap(client, start, end);
1750 if (r < 0)
1751 return r;
1753 return 0;
1756 static int cpu_notify_migration_log(int enable)
1758 CPUPhysMemoryClient *client;
1759 QLIST_FOREACH(client, &memory_client_list, list) {
1760 int r = client->migration_log(client, enable);
1761 if (r < 0)
1762 return r;
1764 return 0;
1767 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1768 * address. Each intermediate table provides the next L2_BITs of guest
1769 * physical address space. The number of levels vary based on host and
1770 * guest configuration, making it efficient to build the final guest
1771 * physical address by seeding the L1 offset and shifting and adding in
1772 * each L2 offset as we recurse through them. */
1773 static void phys_page_for_each_1(CPUPhysMemoryClient *client,
1774 int level, void **lp, target_phys_addr_t addr)
1776 int i;
1778 if (*lp == NULL) {
1779 return;
1781 if (level == 0) {
1782 PhysPageDesc *pd = *lp;
1783 addr <<= L2_BITS + TARGET_PAGE_BITS;
1784 for (i = 0; i < L2_SIZE; ++i) {
1785 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1786 client->set_memory(client, addr | i << TARGET_PAGE_BITS,
1787 TARGET_PAGE_SIZE, pd[i].phys_offset, false);
1790 } else {
1791 void **pp = *lp;
1792 for (i = 0; i < L2_SIZE; ++i) {
1793 phys_page_for_each_1(client, level - 1, pp + i,
1794 (addr << L2_BITS) | i);
1799 static void phys_page_for_each(CPUPhysMemoryClient *client)
1801 int i;
1802 for (i = 0; i < P_L1_SIZE; ++i) {
1803 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1804 l1_phys_map + i, i);
1808 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1810 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1811 phys_page_for_each(client);
1814 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1816 QLIST_REMOVE(client, list);
1818 #endif
1820 static int cmp1(const char *s1, int n, const char *s2)
1822 if (strlen(s2) != n)
1823 return 0;
1824 return memcmp(s1, s2, n) == 0;
1827 /* takes a comma separated list of log masks. Return 0 if error. */
1828 int cpu_str_to_log_mask(const char *str)
1830 const CPULogItem *item;
1831 int mask;
1832 const char *p, *p1;
1834 p = str;
1835 mask = 0;
1836 for(;;) {
1837 p1 = strchr(p, ',');
1838 if (!p1)
1839 p1 = p + strlen(p);
1840 if(cmp1(p,p1-p,"all")) {
1841 for(item = cpu_log_items; item->mask != 0; item++) {
1842 mask |= item->mask;
1844 } else {
1845 for(item = cpu_log_items; item->mask != 0; item++) {
1846 if (cmp1(p, p1 - p, item->name))
1847 goto found;
1849 return 0;
1851 found:
1852 mask |= item->mask;
1853 if (*p1 != ',')
1854 break;
1855 p = p1 + 1;
1857 return mask;
1860 void cpu_abort(CPUState *env, const char *fmt, ...)
1862 va_list ap;
1863 va_list ap2;
1865 va_start(ap, fmt);
1866 va_copy(ap2, ap);
1867 fprintf(stderr, "qemu: fatal: ");
1868 vfprintf(stderr, fmt, ap);
1869 fprintf(stderr, "\n");
1870 #ifdef TARGET_I386
1871 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1872 #else
1873 cpu_dump_state(env, stderr, fprintf, 0);
1874 #endif
1875 if (qemu_log_enabled()) {
1876 qemu_log("qemu: fatal: ");
1877 qemu_log_vprintf(fmt, ap2);
1878 qemu_log("\n");
1879 #ifdef TARGET_I386
1880 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1881 #else
1882 log_cpu_state(env, 0);
1883 #endif
1884 qemu_log_flush();
1885 qemu_log_close();
1887 va_end(ap2);
1888 va_end(ap);
1889 #if defined(CONFIG_USER_ONLY)
1891 struct sigaction act;
1892 sigfillset(&act.sa_mask);
1893 act.sa_handler = SIG_DFL;
1894 sigaction(SIGABRT, &act, NULL);
1896 #endif
1897 abort();
1900 CPUState *cpu_copy(CPUState *env)
1902 CPUState *new_env = cpu_init(env->cpu_model_str);
1903 CPUState *next_cpu = new_env->next_cpu;
1904 int cpu_index = new_env->cpu_index;
1905 #if defined(TARGET_HAS_ICE)
1906 CPUBreakpoint *bp;
1907 CPUWatchpoint *wp;
1908 #endif
1910 memcpy(new_env, env, sizeof(CPUState));
1912 /* Preserve chaining and index. */
1913 new_env->next_cpu = next_cpu;
1914 new_env->cpu_index = cpu_index;
1916 /* Clone all break/watchpoints.
1917 Note: Once we support ptrace with hw-debug register access, make sure
1918 BP_CPU break/watchpoints are handled correctly on clone. */
1919 QTAILQ_INIT(&env->breakpoints);
1920 QTAILQ_INIT(&env->watchpoints);
1921 #if defined(TARGET_HAS_ICE)
1922 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1923 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1925 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1926 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1927 wp->flags, NULL);
1929 #endif
1931 return new_env;
1934 #if !defined(CONFIG_USER_ONLY)
1936 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1938 unsigned int i;
1940 /* Discard jump cache entries for any tb which might potentially
1941 overlap the flushed page. */
1942 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1943 memset (&env->tb_jmp_cache[i], 0,
1944 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1946 i = tb_jmp_cache_hash_page(addr);
1947 memset (&env->tb_jmp_cache[i], 0,
1948 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1951 static CPUTLBEntry s_cputlb_empty_entry = {
1952 .addr_read = -1,
1953 .addr_write = -1,
1954 .addr_code = -1,
1955 .addend = -1,
1958 /* NOTE: if flush_global is true, also flush global entries (not
1959 implemented yet) */
1960 void tlb_flush(CPUState *env, int flush_global)
1962 int i;
1964 #if defined(DEBUG_TLB)
1965 printf("tlb_flush:\n");
1966 #endif
1967 /* must reset current TB so that interrupts cannot modify the
1968 links while we are modifying them */
1969 env->current_tb = NULL;
1971 for(i = 0; i < CPU_TLB_SIZE; i++) {
1972 int mmu_idx;
1973 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1974 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1978 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1980 env->tlb_flush_addr = -1;
1981 env->tlb_flush_mask = 0;
1982 tlb_flush_count++;
1985 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1987 if (addr == (tlb_entry->addr_read &
1988 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1989 addr == (tlb_entry->addr_write &
1990 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1991 addr == (tlb_entry->addr_code &
1992 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1993 *tlb_entry = s_cputlb_empty_entry;
1997 void tlb_flush_page(CPUState *env, target_ulong addr)
1999 int i;
2000 int mmu_idx;
2002 #if defined(DEBUG_TLB)
2003 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2004 #endif
2005 /* Check if we need to flush due to large pages. */
2006 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2007 #if defined(DEBUG_TLB)
2008 printf("tlb_flush_page: forced full flush ("
2009 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2010 env->tlb_flush_addr, env->tlb_flush_mask);
2011 #endif
2012 tlb_flush(env, 1);
2013 return;
2015 /* must reset current TB so that interrupts cannot modify the
2016 links while we are modifying them */
2017 env->current_tb = NULL;
2019 addr &= TARGET_PAGE_MASK;
2020 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2021 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2022 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2024 tlb_flush_jmp_cache(env, addr);
2027 /* update the TLBs so that writes to code in the virtual page 'addr'
2028 can be detected */
2029 static void tlb_protect_code(ram_addr_t ram_addr)
2031 cpu_physical_memory_reset_dirty(ram_addr,
2032 ram_addr + TARGET_PAGE_SIZE,
2033 CODE_DIRTY_FLAG);
2036 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2037 tested for self modifying code */
2038 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2039 target_ulong vaddr)
2041 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2044 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2045 unsigned long start, unsigned long length)
2047 unsigned long addr;
2048 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2049 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2050 if ((addr - start) < length) {
2051 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2056 /* Note: start and end must be within the same ram block. */
2057 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2058 int dirty_flags)
2060 CPUState *env;
2061 unsigned long length, start1;
2062 int i;
2064 start &= TARGET_PAGE_MASK;
2065 end = TARGET_PAGE_ALIGN(end);
2067 length = end - start;
2068 if (length == 0)
2069 return;
2070 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2072 /* we modify the TLB cache so that the dirty bit will be set again
2073 when accessing the range */
2074 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2075 /* Chek that we don't span multiple blocks - this breaks the
2076 address comparisons below. */
2077 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2078 != (end - 1) - start) {
2079 abort();
2082 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2083 int mmu_idx;
2084 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2085 for(i = 0; i < CPU_TLB_SIZE; i++)
2086 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2087 start1, length);
2092 int cpu_physical_memory_set_dirty_tracking(int enable)
2094 int ret = 0;
2095 in_migration = enable;
2096 ret = cpu_notify_migration_log(!!enable);
2097 return ret;
2100 int cpu_physical_memory_get_dirty_tracking(void)
2102 return in_migration;
2105 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2106 target_phys_addr_t end_addr)
2108 int ret;
2110 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2111 return ret;
2114 int cpu_physical_log_start(target_phys_addr_t start_addr,
2115 ram_addr_t size)
2117 CPUPhysMemoryClient *client;
2118 QLIST_FOREACH(client, &memory_client_list, list) {
2119 if (client->log_start) {
2120 int r = client->log_start(client, start_addr, size);
2121 if (r < 0) {
2122 return r;
2126 return 0;
2129 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2130 ram_addr_t size)
2132 CPUPhysMemoryClient *client;
2133 QLIST_FOREACH(client, &memory_client_list, list) {
2134 if (client->log_stop) {
2135 int r = client->log_stop(client, start_addr, size);
2136 if (r < 0) {
2137 return r;
2141 return 0;
2144 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2146 ram_addr_t ram_addr;
2147 void *p;
2149 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2150 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2151 + tlb_entry->addend);
2152 ram_addr = qemu_ram_addr_from_host_nofail(p);
2153 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2154 tlb_entry->addr_write |= TLB_NOTDIRTY;
2159 /* update the TLB according to the current state of the dirty bits */
2160 void cpu_tlb_update_dirty(CPUState *env)
2162 int i;
2163 int mmu_idx;
2164 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2165 for(i = 0; i < CPU_TLB_SIZE; i++)
2166 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2170 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2172 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2173 tlb_entry->addr_write = vaddr;
2176 /* update the TLB corresponding to virtual page vaddr
2177 so that it is no longer dirty */
2178 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2180 int i;
2181 int mmu_idx;
2183 vaddr &= TARGET_PAGE_MASK;
2184 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2185 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2186 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2189 /* Our TLB does not support large pages, so remember the area covered by
2190 large pages and trigger a full TLB flush if these are invalidated. */
2191 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2192 target_ulong size)
2194 target_ulong mask = ~(size - 1);
2196 if (env->tlb_flush_addr == (target_ulong)-1) {
2197 env->tlb_flush_addr = vaddr & mask;
2198 env->tlb_flush_mask = mask;
2199 return;
2201 /* Extend the existing region to include the new page.
2202 This is a compromise between unnecessary flushes and the cost
2203 of maintaining a full variable size TLB. */
2204 mask &= env->tlb_flush_mask;
2205 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2206 mask <<= 1;
2208 env->tlb_flush_addr &= mask;
2209 env->tlb_flush_mask = mask;
2212 /* Add a new TLB entry. At most one entry for a given virtual address
2213 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2214 supplied size is only used by tlb_flush_page. */
2215 void tlb_set_page(CPUState *env, target_ulong vaddr,
2216 target_phys_addr_t paddr, int prot,
2217 int mmu_idx, target_ulong size)
2219 PhysPageDesc *p;
2220 unsigned long pd;
2221 unsigned int index;
2222 target_ulong address;
2223 target_ulong code_address;
2224 unsigned long addend;
2225 CPUTLBEntry *te;
2226 CPUWatchpoint *wp;
2227 target_phys_addr_t iotlb;
2229 assert(size >= TARGET_PAGE_SIZE);
2230 if (size != TARGET_PAGE_SIZE) {
2231 tlb_add_large_page(env, vaddr, size);
2233 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2234 if (!p) {
2235 pd = IO_MEM_UNASSIGNED;
2236 } else {
2237 pd = p->phys_offset;
2239 #if defined(DEBUG_TLB)
2240 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2241 " prot=%x idx=%d pd=0x%08lx\n",
2242 vaddr, paddr, prot, mmu_idx, pd);
2243 #endif
2245 address = vaddr;
2246 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2247 /* IO memory case (romd handled later) */
2248 address |= TLB_MMIO;
2250 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2251 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2252 /* Normal RAM. */
2253 iotlb = pd & TARGET_PAGE_MASK;
2254 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2255 iotlb |= IO_MEM_NOTDIRTY;
2256 else
2257 iotlb |= IO_MEM_ROM;
2258 } else {
2259 /* IO handlers are currently passed a physical address.
2260 It would be nice to pass an offset from the base address
2261 of that region. This would avoid having to special case RAM,
2262 and avoid full address decoding in every device.
2263 We can't use the high bits of pd for this because
2264 IO_MEM_ROMD uses these as a ram address. */
2265 iotlb = (pd & ~TARGET_PAGE_MASK);
2266 if (p) {
2267 iotlb += p->region_offset;
2268 } else {
2269 iotlb += paddr;
2273 code_address = address;
2274 /* Make accesses to pages with watchpoints go via the
2275 watchpoint trap routines. */
2276 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2277 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2278 /* Avoid trapping reads of pages with a write breakpoint. */
2279 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2280 iotlb = io_mem_watch + paddr;
2281 address |= TLB_MMIO;
2282 break;
2287 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2288 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2289 te = &env->tlb_table[mmu_idx][index];
2290 te->addend = addend - vaddr;
2291 if (prot & PAGE_READ) {
2292 te->addr_read = address;
2293 } else {
2294 te->addr_read = -1;
2297 if (prot & PAGE_EXEC) {
2298 te->addr_code = code_address;
2299 } else {
2300 te->addr_code = -1;
2302 if (prot & PAGE_WRITE) {
2303 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2304 (pd & IO_MEM_ROMD)) {
2305 /* Write access calls the I/O callback. */
2306 te->addr_write = address | TLB_MMIO;
2307 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2308 !cpu_physical_memory_is_dirty(pd)) {
2309 te->addr_write = address | TLB_NOTDIRTY;
2310 } else {
2311 te->addr_write = address;
2313 } else {
2314 te->addr_write = -1;
2318 #else
2320 void tlb_flush(CPUState *env, int flush_global)
2324 void tlb_flush_page(CPUState *env, target_ulong addr)
2329 * Walks guest process memory "regions" one by one
2330 * and calls callback function 'fn' for each region.
2333 struct walk_memory_regions_data
2335 walk_memory_regions_fn fn;
2336 void *priv;
2337 unsigned long start;
2338 int prot;
2341 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2342 abi_ulong end, int new_prot)
2344 if (data->start != -1ul) {
2345 int rc = data->fn(data->priv, data->start, end, data->prot);
2346 if (rc != 0) {
2347 return rc;
2351 data->start = (new_prot ? end : -1ul);
2352 data->prot = new_prot;
2354 return 0;
2357 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2358 abi_ulong base, int level, void **lp)
2360 abi_ulong pa;
2361 int i, rc;
2363 if (*lp == NULL) {
2364 return walk_memory_regions_end(data, base, 0);
2367 if (level == 0) {
2368 PageDesc *pd = *lp;
2369 for (i = 0; i < L2_SIZE; ++i) {
2370 int prot = pd[i].flags;
2372 pa = base | (i << TARGET_PAGE_BITS);
2373 if (prot != data->prot) {
2374 rc = walk_memory_regions_end(data, pa, prot);
2375 if (rc != 0) {
2376 return rc;
2380 } else {
2381 void **pp = *lp;
2382 for (i = 0; i < L2_SIZE; ++i) {
2383 pa = base | ((abi_ulong)i <<
2384 (TARGET_PAGE_BITS + L2_BITS * level));
2385 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2386 if (rc != 0) {
2387 return rc;
2392 return 0;
2395 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2397 struct walk_memory_regions_data data;
2398 unsigned long i;
2400 data.fn = fn;
2401 data.priv = priv;
2402 data.start = -1ul;
2403 data.prot = 0;
2405 for (i = 0; i < V_L1_SIZE; i++) {
2406 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2407 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2408 if (rc != 0) {
2409 return rc;
2413 return walk_memory_regions_end(&data, 0, 0);
2416 static int dump_region(void *priv, abi_ulong start,
2417 abi_ulong end, unsigned long prot)
2419 FILE *f = (FILE *)priv;
2421 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2422 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2423 start, end, end - start,
2424 ((prot & PAGE_READ) ? 'r' : '-'),
2425 ((prot & PAGE_WRITE) ? 'w' : '-'),
2426 ((prot & PAGE_EXEC) ? 'x' : '-'));
2428 return (0);
2431 /* dump memory mappings */
2432 void page_dump(FILE *f)
2434 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2435 "start", "end", "size", "prot");
2436 walk_memory_regions(f, dump_region);
2439 int page_get_flags(target_ulong address)
2441 PageDesc *p;
2443 p = page_find(address >> TARGET_PAGE_BITS);
2444 if (!p)
2445 return 0;
2446 return p->flags;
2449 /* Modify the flags of a page and invalidate the code if necessary.
2450 The flag PAGE_WRITE_ORG is positioned automatically depending
2451 on PAGE_WRITE. The mmap_lock should already be held. */
2452 void page_set_flags(target_ulong start, target_ulong end, int flags)
2454 target_ulong addr, len;
2456 /* This function should never be called with addresses outside the
2457 guest address space. If this assert fires, it probably indicates
2458 a missing call to h2g_valid. */
2459 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2460 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2461 #endif
2462 assert(start < end);
2464 start = start & TARGET_PAGE_MASK;
2465 end = TARGET_PAGE_ALIGN(end);
2467 if (flags & PAGE_WRITE) {
2468 flags |= PAGE_WRITE_ORG;
2471 for (addr = start, len = end - start;
2472 len != 0;
2473 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2474 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2476 /* If the write protection bit is set, then we invalidate
2477 the code inside. */
2478 if (!(p->flags & PAGE_WRITE) &&
2479 (flags & PAGE_WRITE) &&
2480 p->first_tb) {
2481 tb_invalidate_phys_page(addr, 0, NULL);
2483 p->flags = flags;
2487 int page_check_range(target_ulong start, target_ulong len, int flags)
2489 PageDesc *p;
2490 target_ulong end;
2491 target_ulong addr;
2493 /* This function should never be called with addresses outside the
2494 guest address space. If this assert fires, it probably indicates
2495 a missing call to h2g_valid. */
2496 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2497 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2498 #endif
2500 if (len == 0) {
2501 return 0;
2503 if (start + len - 1 < start) {
2504 /* We've wrapped around. */
2505 return -1;
2508 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2509 start = start & TARGET_PAGE_MASK;
2511 for (addr = start, len = end - start;
2512 len != 0;
2513 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2514 p = page_find(addr >> TARGET_PAGE_BITS);
2515 if( !p )
2516 return -1;
2517 if( !(p->flags & PAGE_VALID) )
2518 return -1;
2520 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2521 return -1;
2522 if (flags & PAGE_WRITE) {
2523 if (!(p->flags & PAGE_WRITE_ORG))
2524 return -1;
2525 /* unprotect the page if it was put read-only because it
2526 contains translated code */
2527 if (!(p->flags & PAGE_WRITE)) {
2528 if (!page_unprotect(addr, 0, NULL))
2529 return -1;
2531 return 0;
2534 return 0;
2537 /* called from signal handler: invalidate the code and unprotect the
2538 page. Return TRUE if the fault was successfully handled. */
2539 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2541 unsigned int prot;
2542 PageDesc *p;
2543 target_ulong host_start, host_end, addr;
2545 /* Technically this isn't safe inside a signal handler. However we
2546 know this only ever happens in a synchronous SEGV handler, so in
2547 practice it seems to be ok. */
2548 mmap_lock();
2550 p = page_find(address >> TARGET_PAGE_BITS);
2551 if (!p) {
2552 mmap_unlock();
2553 return 0;
2556 /* if the page was really writable, then we change its
2557 protection back to writable */
2558 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2559 host_start = address & qemu_host_page_mask;
2560 host_end = host_start + qemu_host_page_size;
2562 prot = 0;
2563 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2564 p = page_find(addr >> TARGET_PAGE_BITS);
2565 p->flags |= PAGE_WRITE;
2566 prot |= p->flags;
2568 /* and since the content will be modified, we must invalidate
2569 the corresponding translated code. */
2570 tb_invalidate_phys_page(addr, pc, puc);
2571 #ifdef DEBUG_TB_CHECK
2572 tb_invalidate_check(addr);
2573 #endif
2575 mprotect((void *)g2h(host_start), qemu_host_page_size,
2576 prot & PAGE_BITS);
2578 mmap_unlock();
2579 return 1;
2581 mmap_unlock();
2582 return 0;
2585 static inline void tlb_set_dirty(CPUState *env,
2586 unsigned long addr, target_ulong vaddr)
2589 #endif /* defined(CONFIG_USER_ONLY) */
2591 #if !defined(CONFIG_USER_ONLY)
2593 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2594 typedef struct subpage_t {
2595 target_phys_addr_t base;
2596 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2597 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2598 } subpage_t;
2600 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2601 ram_addr_t memory, ram_addr_t region_offset);
2602 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2603 ram_addr_t orig_memory,
2604 ram_addr_t region_offset);
2605 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2606 need_subpage) \
2607 do { \
2608 if (addr > start_addr) \
2609 start_addr2 = 0; \
2610 else { \
2611 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2612 if (start_addr2 > 0) \
2613 need_subpage = 1; \
2616 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2617 end_addr2 = TARGET_PAGE_SIZE - 1; \
2618 else { \
2619 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2620 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2621 need_subpage = 1; \
2623 } while (0)
2625 /* register physical memory.
2626 For RAM, 'size' must be a multiple of the target page size.
2627 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2628 io memory page. The address used when calling the IO function is
2629 the offset from the start of the region, plus region_offset. Both
2630 start_addr and region_offset are rounded down to a page boundary
2631 before calculating this offset. This should not be a problem unless
2632 the low bits of start_addr and region_offset differ. */
2633 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2634 ram_addr_t size,
2635 ram_addr_t phys_offset,
2636 ram_addr_t region_offset,
2637 bool log_dirty)
2639 target_phys_addr_t addr, end_addr;
2640 PhysPageDesc *p;
2641 CPUState *env;
2642 ram_addr_t orig_size = size;
2643 subpage_t *subpage;
2645 assert(size);
2646 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2648 if (phys_offset == IO_MEM_UNASSIGNED) {
2649 region_offset = start_addr;
2651 region_offset &= TARGET_PAGE_MASK;
2652 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2653 end_addr = start_addr + (target_phys_addr_t)size;
2655 addr = start_addr;
2656 do {
2657 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2658 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2659 ram_addr_t orig_memory = p->phys_offset;
2660 target_phys_addr_t start_addr2, end_addr2;
2661 int need_subpage = 0;
2663 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2664 need_subpage);
2665 if (need_subpage) {
2666 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2667 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2668 &p->phys_offset, orig_memory,
2669 p->region_offset);
2670 } else {
2671 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2672 >> IO_MEM_SHIFT];
2674 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2675 region_offset);
2676 p->region_offset = 0;
2677 } else {
2678 p->phys_offset = phys_offset;
2679 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2680 (phys_offset & IO_MEM_ROMD))
2681 phys_offset += TARGET_PAGE_SIZE;
2683 } else {
2684 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2685 p->phys_offset = phys_offset;
2686 p->region_offset = region_offset;
2687 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2688 (phys_offset & IO_MEM_ROMD)) {
2689 phys_offset += TARGET_PAGE_SIZE;
2690 } else {
2691 target_phys_addr_t start_addr2, end_addr2;
2692 int need_subpage = 0;
2694 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2695 end_addr2, need_subpage);
2697 if (need_subpage) {
2698 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2699 &p->phys_offset, IO_MEM_UNASSIGNED,
2700 addr & TARGET_PAGE_MASK);
2701 subpage_register(subpage, start_addr2, end_addr2,
2702 phys_offset, region_offset);
2703 p->region_offset = 0;
2707 region_offset += TARGET_PAGE_SIZE;
2708 addr += TARGET_PAGE_SIZE;
2709 } while (addr != end_addr);
2711 /* since each CPU stores ram addresses in its TLB cache, we must
2712 reset the modified entries */
2713 /* XXX: slow ! */
2714 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2715 tlb_flush(env, 1);
2719 /* XXX: temporary until new memory mapping API */
2720 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2722 PhysPageDesc *p;
2724 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2725 if (!p)
2726 return IO_MEM_UNASSIGNED;
2727 return p->phys_offset;
2730 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2732 if (kvm_enabled())
2733 kvm_coalesce_mmio_region(addr, size);
2736 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2738 if (kvm_enabled())
2739 kvm_uncoalesce_mmio_region(addr, size);
2742 void qemu_flush_coalesced_mmio_buffer(void)
2744 if (kvm_enabled())
2745 kvm_flush_coalesced_mmio_buffer();
2748 #if defined(__linux__) && !defined(TARGET_S390X)
2750 #include <sys/vfs.h>
2752 #define HUGETLBFS_MAGIC 0x958458f6
2754 static long gethugepagesize(const char *path)
2756 struct statfs fs;
2757 int ret;
2759 do {
2760 ret = statfs(path, &fs);
2761 } while (ret != 0 && errno == EINTR);
2763 if (ret != 0) {
2764 perror(path);
2765 return 0;
2768 if (fs.f_type != HUGETLBFS_MAGIC)
2769 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2771 return fs.f_bsize;
2774 static void *file_ram_alloc(RAMBlock *block,
2775 ram_addr_t memory,
2776 const char *path)
2778 char *filename;
2779 void *area;
2780 int fd;
2781 #ifdef MAP_POPULATE
2782 int flags;
2783 #endif
2784 unsigned long hpagesize;
2786 hpagesize = gethugepagesize(path);
2787 if (!hpagesize) {
2788 return NULL;
2791 if (memory < hpagesize) {
2792 return NULL;
2795 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2796 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2797 return NULL;
2800 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2801 return NULL;
2804 fd = mkstemp(filename);
2805 if (fd < 0) {
2806 perror("unable to create backing store for hugepages");
2807 free(filename);
2808 return NULL;
2810 unlink(filename);
2811 free(filename);
2813 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2816 * ftruncate is not supported by hugetlbfs in older
2817 * hosts, so don't bother bailing out on errors.
2818 * If anything goes wrong with it under other filesystems,
2819 * mmap will fail.
2821 if (ftruncate(fd, memory))
2822 perror("ftruncate");
2824 #ifdef MAP_POPULATE
2825 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2826 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2827 * to sidestep this quirk.
2829 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2830 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2831 #else
2832 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2833 #endif
2834 if (area == MAP_FAILED) {
2835 perror("file_ram_alloc: can't mmap RAM pages");
2836 close(fd);
2837 return (NULL);
2839 block->fd = fd;
2840 return area;
2842 #endif
2844 static ram_addr_t find_ram_offset(ram_addr_t size)
2846 RAMBlock *block, *next_block;
2847 ram_addr_t offset = 0, mingap = ULONG_MAX;
2849 if (QLIST_EMPTY(&ram_list.blocks))
2850 return 0;
2852 QLIST_FOREACH(block, &ram_list.blocks, next) {
2853 ram_addr_t end, next = ULONG_MAX;
2855 end = block->offset + block->length;
2857 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2858 if (next_block->offset >= end) {
2859 next = MIN(next, next_block->offset);
2862 if (next - end >= size && next - end < mingap) {
2863 offset = end;
2864 mingap = next - end;
2867 return offset;
2870 static ram_addr_t last_ram_offset(void)
2872 RAMBlock *block;
2873 ram_addr_t last = 0;
2875 QLIST_FOREACH(block, &ram_list.blocks, next)
2876 last = MAX(last, block->offset + block->length);
2878 return last;
2881 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2882 ram_addr_t size, void *host)
2884 RAMBlock *new_block, *block;
2886 size = TARGET_PAGE_ALIGN(size);
2887 new_block = qemu_mallocz(sizeof(*new_block));
2889 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2890 char *id = dev->parent_bus->info->get_dev_path(dev);
2891 if (id) {
2892 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2893 qemu_free(id);
2896 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2898 QLIST_FOREACH(block, &ram_list.blocks, next) {
2899 if (!strcmp(block->idstr, new_block->idstr)) {
2900 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2901 new_block->idstr);
2902 abort();
2906 new_block->offset = find_ram_offset(size);
2907 if (host) {
2908 new_block->host = host;
2909 new_block->flags |= RAM_PREALLOC_MASK;
2910 } else {
2911 if (mem_path) {
2912 #if defined (__linux__) && !defined(TARGET_S390X)
2913 new_block->host = file_ram_alloc(new_block, size, mem_path);
2914 if (!new_block->host) {
2915 new_block->host = qemu_vmalloc(size);
2916 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2918 #else
2919 fprintf(stderr, "-mem-path option unsupported\n");
2920 exit(1);
2921 #endif
2922 } else {
2923 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2924 /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
2925 new_block->host = mmap((void*)0x1000000, size,
2926 PROT_EXEC|PROT_READ|PROT_WRITE,
2927 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
2928 #else
2929 if (xen_mapcache_enabled()) {
2930 xen_ram_alloc(new_block->offset, size);
2931 } else {
2932 new_block->host = qemu_vmalloc(size);
2934 #endif
2935 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2938 new_block->length = size;
2940 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2942 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2943 last_ram_offset() >> TARGET_PAGE_BITS);
2944 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2945 0xff, size >> TARGET_PAGE_BITS);
2947 if (kvm_enabled())
2948 kvm_setup_guest_memory(new_block->host, size);
2950 return new_block->offset;
2953 void qemu_ram_unmap(ram_addr_t addr)
2955 RAMBlock *block;
2957 QLIST_FOREACH(block, &ram_list.blocks, next) {
2958 if (addr == block->offset) {
2959 QLIST_REMOVE(block, next);
2960 qemu_free(block);
2961 return;
2966 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2968 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2971 void qemu_ram_free(ram_addr_t addr)
2973 RAMBlock *block;
2975 QLIST_FOREACH(block, &ram_list.blocks, next) {
2976 if (addr == block->offset) {
2977 QLIST_REMOVE(block, next);
2978 if (block->flags & RAM_PREALLOC_MASK) {
2980 } else if (mem_path) {
2981 #if defined (__linux__) && !defined(TARGET_S390X)
2982 if (block->fd) {
2983 munmap(block->host, block->length);
2984 close(block->fd);
2985 } else {
2986 qemu_vfree(block->host);
2988 #else
2989 abort();
2990 #endif
2991 } else {
2992 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2993 munmap(block->host, block->length);
2994 #else
2995 if (xen_mapcache_enabled()) {
2996 qemu_invalidate_entry(block->host);
2997 } else {
2998 qemu_vfree(block->host);
3000 #endif
3002 qemu_free(block);
3003 return;
3009 #ifndef _WIN32
3010 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3012 RAMBlock *block;
3013 ram_addr_t offset;
3014 int flags;
3015 void *area, *vaddr;
3017 QLIST_FOREACH(block, &ram_list.blocks, next) {
3018 offset = addr - block->offset;
3019 if (offset < block->length) {
3020 vaddr = block->host + offset;
3021 if (block->flags & RAM_PREALLOC_MASK) {
3023 } else {
3024 flags = MAP_FIXED;
3025 munmap(vaddr, length);
3026 if (mem_path) {
3027 #if defined(__linux__) && !defined(TARGET_S390X)
3028 if (block->fd) {
3029 #ifdef MAP_POPULATE
3030 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3031 MAP_PRIVATE;
3032 #else
3033 flags |= MAP_PRIVATE;
3034 #endif
3035 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3036 flags, block->fd, offset);
3037 } else {
3038 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3039 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3040 flags, -1, 0);
3042 #else
3043 abort();
3044 #endif
3045 } else {
3046 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3047 flags |= MAP_SHARED | MAP_ANONYMOUS;
3048 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3049 flags, -1, 0);
3050 #else
3051 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3052 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3053 flags, -1, 0);
3054 #endif
3056 if (area != vaddr) {
3057 fprintf(stderr, "Could not remap addr: %lx@%lx\n",
3058 length, addr);
3059 exit(1);
3061 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3063 return;
3067 #endif /* !_WIN32 */
3069 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3070 With the exception of the softmmu code in this file, this should
3071 only be used for local memory (e.g. video ram) that the device owns,
3072 and knows it isn't going to access beyond the end of the block.
3074 It should not be used for general purpose DMA.
3075 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3077 void *qemu_get_ram_ptr(ram_addr_t addr)
3079 RAMBlock *block;
3081 QLIST_FOREACH(block, &ram_list.blocks, next) {
3082 if (addr - block->offset < block->length) {
3083 /* Move this entry to to start of the list. */
3084 if (block != QLIST_FIRST(&ram_list.blocks)) {
3085 QLIST_REMOVE(block, next);
3086 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3088 if (xen_mapcache_enabled()) {
3089 /* We need to check if the requested address is in the RAM
3090 * because we don't want to map the entire memory in QEMU.
3092 if (block->offset == 0) {
3093 return qemu_map_cache(addr, 0, 1);
3094 } else if (block->host == NULL) {
3095 block->host = xen_map_block(block->offset, block->length);
3098 return block->host + (addr - block->offset);
3102 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3103 abort();
3105 return NULL;
3108 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3109 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3111 void *qemu_safe_ram_ptr(ram_addr_t addr)
3113 RAMBlock *block;
3115 QLIST_FOREACH(block, &ram_list.blocks, next) {
3116 if (addr - block->offset < block->length) {
3117 if (xen_mapcache_enabled()) {
3118 /* We need to check if the requested address is in the RAM
3119 * because we don't want to map the entire memory in QEMU.
3121 if (block->offset == 0) {
3122 return qemu_map_cache(addr, 0, 1);
3123 } else if (block->host == NULL) {
3124 block->host = xen_map_block(block->offset, block->length);
3127 return block->host + (addr - block->offset);
3131 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3132 abort();
3134 return NULL;
3137 void qemu_put_ram_ptr(void *addr)
3139 trace_qemu_put_ram_ptr(addr);
3141 if (xen_mapcache_enabled()) {
3142 RAMBlock *block;
3144 QLIST_FOREACH(block, &ram_list.blocks, next) {
3145 if (addr == block->host) {
3146 break;
3149 if (block && block->host) {
3150 xen_unmap_block(block->host, block->length);
3151 block->host = NULL;
3152 } else {
3153 qemu_map_cache_unlock(addr);
3158 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3160 RAMBlock *block;
3161 uint8_t *host = ptr;
3163 QLIST_FOREACH(block, &ram_list.blocks, next) {
3164 /* This case append when the block is not mapped. */
3165 if (block->host == NULL) {
3166 continue;
3168 if (host - block->host < block->length) {
3169 *ram_addr = block->offset + (host - block->host);
3170 return 0;
3174 if (xen_mapcache_enabled()) {
3175 *ram_addr = qemu_ram_addr_from_mapcache(ptr);
3176 return 0;
3179 return -1;
3182 /* Some of the softmmu routines need to translate from a host pointer
3183 (typically a TLB entry) back to a ram offset. */
3184 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3186 ram_addr_t ram_addr;
3188 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3189 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3190 abort();
3192 return ram_addr;
3195 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3197 #ifdef DEBUG_UNASSIGNED
3198 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3199 #endif
3200 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3201 do_unassigned_access(addr, 0, 0, 0, 1);
3202 #endif
3203 return 0;
3206 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3208 #ifdef DEBUG_UNASSIGNED
3209 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3210 #endif
3211 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3212 do_unassigned_access(addr, 0, 0, 0, 2);
3213 #endif
3214 return 0;
3217 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3219 #ifdef DEBUG_UNASSIGNED
3220 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3221 #endif
3222 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3223 do_unassigned_access(addr, 0, 0, 0, 4);
3224 #endif
3225 return 0;
3228 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3230 #ifdef DEBUG_UNASSIGNED
3231 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3232 #endif
3233 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3234 do_unassigned_access(addr, 1, 0, 0, 1);
3235 #endif
3238 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3240 #ifdef DEBUG_UNASSIGNED
3241 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3242 #endif
3243 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3244 do_unassigned_access(addr, 1, 0, 0, 2);
3245 #endif
3248 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3250 #ifdef DEBUG_UNASSIGNED
3251 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3252 #endif
3253 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3254 do_unassigned_access(addr, 1, 0, 0, 4);
3255 #endif
3258 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3259 unassigned_mem_readb,
3260 unassigned_mem_readw,
3261 unassigned_mem_readl,
3264 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3265 unassigned_mem_writeb,
3266 unassigned_mem_writew,
3267 unassigned_mem_writel,
3270 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3271 uint32_t val)
3273 int dirty_flags;
3274 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3275 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3276 #if !defined(CONFIG_USER_ONLY)
3277 tb_invalidate_phys_page_fast(ram_addr, 1);
3278 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3279 #endif
3281 stb_p(qemu_get_ram_ptr(ram_addr), val);
3282 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3283 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3284 /* we remove the notdirty callback only if the code has been
3285 flushed */
3286 if (dirty_flags == 0xff)
3287 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3290 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3291 uint32_t val)
3293 int dirty_flags;
3294 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3295 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3296 #if !defined(CONFIG_USER_ONLY)
3297 tb_invalidate_phys_page_fast(ram_addr, 2);
3298 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3299 #endif
3301 stw_p(qemu_get_ram_ptr(ram_addr), val);
3302 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3303 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3304 /* we remove the notdirty callback only if the code has been
3305 flushed */
3306 if (dirty_flags == 0xff)
3307 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3310 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3311 uint32_t val)
3313 int dirty_flags;
3314 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3315 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3316 #if !defined(CONFIG_USER_ONLY)
3317 tb_invalidate_phys_page_fast(ram_addr, 4);
3318 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3319 #endif
3321 stl_p(qemu_get_ram_ptr(ram_addr), val);
3322 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3323 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3324 /* we remove the notdirty callback only if the code has been
3325 flushed */
3326 if (dirty_flags == 0xff)
3327 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3330 static CPUReadMemoryFunc * const error_mem_read[3] = {
3331 NULL, /* never used */
3332 NULL, /* never used */
3333 NULL, /* never used */
3336 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3337 notdirty_mem_writeb,
3338 notdirty_mem_writew,
3339 notdirty_mem_writel,
3342 /* Generate a debug exception if a watchpoint has been hit. */
3343 static void check_watchpoint(int offset, int len_mask, int flags)
3345 CPUState *env = cpu_single_env;
3346 target_ulong pc, cs_base;
3347 TranslationBlock *tb;
3348 target_ulong vaddr;
3349 CPUWatchpoint *wp;
3350 int cpu_flags;
3352 if (env->watchpoint_hit) {
3353 /* We re-entered the check after replacing the TB. Now raise
3354 * the debug interrupt so that is will trigger after the
3355 * current instruction. */
3356 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3357 return;
3359 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3360 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3361 if ((vaddr == (wp->vaddr & len_mask) ||
3362 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3363 wp->flags |= BP_WATCHPOINT_HIT;
3364 if (!env->watchpoint_hit) {
3365 env->watchpoint_hit = wp;
3366 tb = tb_find_pc(env->mem_io_pc);
3367 if (!tb) {
3368 cpu_abort(env, "check_watchpoint: could not find TB for "
3369 "pc=%p", (void *)env->mem_io_pc);
3371 cpu_restore_state(tb, env, env->mem_io_pc);
3372 tb_phys_invalidate(tb, -1);
3373 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3374 env->exception_index = EXCP_DEBUG;
3375 } else {
3376 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3377 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3379 cpu_resume_from_signal(env, NULL);
3381 } else {
3382 wp->flags &= ~BP_WATCHPOINT_HIT;
3387 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3388 so these check for a hit then pass through to the normal out-of-line
3389 phys routines. */
3390 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3392 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3393 return ldub_phys(addr);
3396 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3398 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3399 return lduw_phys(addr);
3402 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3404 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3405 return ldl_phys(addr);
3408 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3409 uint32_t val)
3411 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3412 stb_phys(addr, val);
3415 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3416 uint32_t val)
3418 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3419 stw_phys(addr, val);
3422 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3423 uint32_t val)
3425 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3426 stl_phys(addr, val);
3429 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3430 watch_mem_readb,
3431 watch_mem_readw,
3432 watch_mem_readl,
3435 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3436 watch_mem_writeb,
3437 watch_mem_writew,
3438 watch_mem_writel,
3441 static inline uint32_t subpage_readlen (subpage_t *mmio,
3442 target_phys_addr_t addr,
3443 unsigned int len)
3445 unsigned int idx = SUBPAGE_IDX(addr);
3446 #if defined(DEBUG_SUBPAGE)
3447 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3448 mmio, len, addr, idx);
3449 #endif
3451 addr += mmio->region_offset[idx];
3452 idx = mmio->sub_io_index[idx];
3453 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3456 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3457 uint32_t value, unsigned int len)
3459 unsigned int idx = SUBPAGE_IDX(addr);
3460 #if defined(DEBUG_SUBPAGE)
3461 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3462 __func__, mmio, len, addr, idx, value);
3463 #endif
3465 addr += mmio->region_offset[idx];
3466 idx = mmio->sub_io_index[idx];
3467 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3470 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3472 return subpage_readlen(opaque, addr, 0);
3475 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3476 uint32_t value)
3478 subpage_writelen(opaque, addr, value, 0);
3481 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3483 return subpage_readlen(opaque, addr, 1);
3486 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3487 uint32_t value)
3489 subpage_writelen(opaque, addr, value, 1);
3492 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3494 return subpage_readlen(opaque, addr, 2);
3497 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3498 uint32_t value)
3500 subpage_writelen(opaque, addr, value, 2);
3503 static CPUReadMemoryFunc * const subpage_read[] = {
3504 &subpage_readb,
3505 &subpage_readw,
3506 &subpage_readl,
3509 static CPUWriteMemoryFunc * const subpage_write[] = {
3510 &subpage_writeb,
3511 &subpage_writew,
3512 &subpage_writel,
3515 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3516 ram_addr_t memory, ram_addr_t region_offset)
3518 int idx, eidx;
3520 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3521 return -1;
3522 idx = SUBPAGE_IDX(start);
3523 eidx = SUBPAGE_IDX(end);
3524 #if defined(DEBUG_SUBPAGE)
3525 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3526 mmio, start, end, idx, eidx, memory);
3527 #endif
3528 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3529 memory = IO_MEM_UNASSIGNED;
3530 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3531 for (; idx <= eidx; idx++) {
3532 mmio->sub_io_index[idx] = memory;
3533 mmio->region_offset[idx] = region_offset;
3536 return 0;
3539 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3540 ram_addr_t orig_memory,
3541 ram_addr_t region_offset)
3543 subpage_t *mmio;
3544 int subpage_memory;
3546 mmio = qemu_mallocz(sizeof(subpage_t));
3548 mmio->base = base;
3549 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3550 DEVICE_NATIVE_ENDIAN);
3551 #if defined(DEBUG_SUBPAGE)
3552 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3553 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3554 #endif
3555 *phys = subpage_memory | IO_MEM_SUBPAGE;
3556 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3558 return mmio;
3561 static int get_free_io_mem_idx(void)
3563 int i;
3565 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3566 if (!io_mem_used[i]) {
3567 io_mem_used[i] = 1;
3568 return i;
3570 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3571 return -1;
3575 * Usually, devices operate in little endian mode. There are devices out
3576 * there that operate in big endian too. Each device gets byte swapped
3577 * mmio if plugged onto a CPU that does the other endianness.
3579 * CPU Device swap?
3581 * little little no
3582 * little big yes
3583 * big little yes
3584 * big big no
3587 typedef struct SwapEndianContainer {
3588 CPUReadMemoryFunc *read[3];
3589 CPUWriteMemoryFunc *write[3];
3590 void *opaque;
3591 } SwapEndianContainer;
3593 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3595 uint32_t val;
3596 SwapEndianContainer *c = opaque;
3597 val = c->read[0](c->opaque, addr);
3598 return val;
3601 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3603 uint32_t val;
3604 SwapEndianContainer *c = opaque;
3605 val = bswap16(c->read[1](c->opaque, addr));
3606 return val;
3609 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3611 uint32_t val;
3612 SwapEndianContainer *c = opaque;
3613 val = bswap32(c->read[2](c->opaque, addr));
3614 return val;
3617 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3618 swapendian_mem_readb,
3619 swapendian_mem_readw,
3620 swapendian_mem_readl
3623 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3624 uint32_t val)
3626 SwapEndianContainer *c = opaque;
3627 c->write[0](c->opaque, addr, val);
3630 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3631 uint32_t val)
3633 SwapEndianContainer *c = opaque;
3634 c->write[1](c->opaque, addr, bswap16(val));
3637 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3638 uint32_t val)
3640 SwapEndianContainer *c = opaque;
3641 c->write[2](c->opaque, addr, bswap32(val));
3644 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3645 swapendian_mem_writeb,
3646 swapendian_mem_writew,
3647 swapendian_mem_writel
3650 static void swapendian_init(int io_index)
3652 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3653 int i;
3655 /* Swap mmio for big endian targets */
3656 c->opaque = io_mem_opaque[io_index];
3657 for (i = 0; i < 3; i++) {
3658 c->read[i] = io_mem_read[io_index][i];
3659 c->write[i] = io_mem_write[io_index][i];
3661 io_mem_read[io_index][i] = swapendian_readfn[i];
3662 io_mem_write[io_index][i] = swapendian_writefn[i];
3664 io_mem_opaque[io_index] = c;
3667 static void swapendian_del(int io_index)
3669 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3670 qemu_free(io_mem_opaque[io_index]);
3674 /* mem_read and mem_write are arrays of functions containing the
3675 function to access byte (index 0), word (index 1) and dword (index
3676 2). Functions can be omitted with a NULL function pointer.
3677 If io_index is non zero, the corresponding io zone is
3678 modified. If it is zero, a new io zone is allocated. The return
3679 value can be used with cpu_register_physical_memory(). (-1) is
3680 returned if error. */
3681 static int cpu_register_io_memory_fixed(int io_index,
3682 CPUReadMemoryFunc * const *mem_read,
3683 CPUWriteMemoryFunc * const *mem_write,
3684 void *opaque, enum device_endian endian)
3686 int i;
3688 if (io_index <= 0) {
3689 io_index = get_free_io_mem_idx();
3690 if (io_index == -1)
3691 return io_index;
3692 } else {
3693 io_index >>= IO_MEM_SHIFT;
3694 if (io_index >= IO_MEM_NB_ENTRIES)
3695 return -1;
3698 for (i = 0; i < 3; ++i) {
3699 io_mem_read[io_index][i]
3700 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3702 for (i = 0; i < 3; ++i) {
3703 io_mem_write[io_index][i]
3704 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3706 io_mem_opaque[io_index] = opaque;
3708 switch (endian) {
3709 case DEVICE_BIG_ENDIAN:
3710 #ifndef TARGET_WORDS_BIGENDIAN
3711 swapendian_init(io_index);
3712 #endif
3713 break;
3714 case DEVICE_LITTLE_ENDIAN:
3715 #ifdef TARGET_WORDS_BIGENDIAN
3716 swapendian_init(io_index);
3717 #endif
3718 break;
3719 case DEVICE_NATIVE_ENDIAN:
3720 default:
3721 break;
3724 return (io_index << IO_MEM_SHIFT);
3727 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3728 CPUWriteMemoryFunc * const *mem_write,
3729 void *opaque, enum device_endian endian)
3731 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3734 void cpu_unregister_io_memory(int io_table_address)
3736 int i;
3737 int io_index = io_table_address >> IO_MEM_SHIFT;
3739 swapendian_del(io_index);
3741 for (i=0;i < 3; i++) {
3742 io_mem_read[io_index][i] = unassigned_mem_read[i];
3743 io_mem_write[io_index][i] = unassigned_mem_write[i];
3745 io_mem_opaque[io_index] = NULL;
3746 io_mem_used[io_index] = 0;
3749 static void io_mem_init(void)
3751 int i;
3753 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3754 unassigned_mem_write, NULL,
3755 DEVICE_NATIVE_ENDIAN);
3756 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3757 unassigned_mem_write, NULL,
3758 DEVICE_NATIVE_ENDIAN);
3759 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3760 notdirty_mem_write, NULL,
3761 DEVICE_NATIVE_ENDIAN);
3762 for (i=0; i<5; i++)
3763 io_mem_used[i] = 1;
3765 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3766 watch_mem_write, NULL,
3767 DEVICE_NATIVE_ENDIAN);
3770 #endif /* !defined(CONFIG_USER_ONLY) */
3772 /* physical memory access (slow version, mainly for debug) */
3773 #if defined(CONFIG_USER_ONLY)
3774 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3775 uint8_t *buf, int len, int is_write)
3777 int l, flags;
3778 target_ulong page;
3779 void * p;
3781 while (len > 0) {
3782 page = addr & TARGET_PAGE_MASK;
3783 l = (page + TARGET_PAGE_SIZE) - addr;
3784 if (l > len)
3785 l = len;
3786 flags = page_get_flags(page);
3787 if (!(flags & PAGE_VALID))
3788 return -1;
3789 if (is_write) {
3790 if (!(flags & PAGE_WRITE))
3791 return -1;
3792 /* XXX: this code should not depend on lock_user */
3793 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3794 return -1;
3795 memcpy(p, buf, l);
3796 unlock_user(p, addr, l);
3797 } else {
3798 if (!(flags & PAGE_READ))
3799 return -1;
3800 /* XXX: this code should not depend on lock_user */
3801 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3802 return -1;
3803 memcpy(buf, p, l);
3804 unlock_user(p, addr, 0);
3806 len -= l;
3807 buf += l;
3808 addr += l;
3810 return 0;
3813 #else
3814 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3815 int len, int is_write)
3817 int l, io_index;
3818 uint8_t *ptr;
3819 uint32_t val;
3820 target_phys_addr_t page;
3821 unsigned long pd;
3822 PhysPageDesc *p;
3824 while (len > 0) {
3825 page = addr & TARGET_PAGE_MASK;
3826 l = (page + TARGET_PAGE_SIZE) - addr;
3827 if (l > len)
3828 l = len;
3829 p = phys_page_find(page >> TARGET_PAGE_BITS);
3830 if (!p) {
3831 pd = IO_MEM_UNASSIGNED;
3832 } else {
3833 pd = p->phys_offset;
3836 if (is_write) {
3837 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3838 target_phys_addr_t addr1 = addr;
3839 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3840 if (p)
3841 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3842 /* XXX: could force cpu_single_env to NULL to avoid
3843 potential bugs */
3844 if (l >= 4 && ((addr1 & 3) == 0)) {
3845 /* 32 bit write access */
3846 val = ldl_p(buf);
3847 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3848 l = 4;
3849 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3850 /* 16 bit write access */
3851 val = lduw_p(buf);
3852 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3853 l = 2;
3854 } else {
3855 /* 8 bit write access */
3856 val = ldub_p(buf);
3857 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3858 l = 1;
3860 } else {
3861 unsigned long addr1;
3862 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3863 /* RAM case */
3864 ptr = qemu_get_ram_ptr(addr1);
3865 memcpy(ptr, buf, l);
3866 if (!cpu_physical_memory_is_dirty(addr1)) {
3867 /* invalidate code */
3868 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3869 /* set dirty bit */
3870 cpu_physical_memory_set_dirty_flags(
3871 addr1, (0xff & ~CODE_DIRTY_FLAG));
3873 /* qemu doesn't execute guest code directly, but kvm does
3874 therefore flush instruction caches */
3875 if (kvm_enabled())
3876 flush_icache_range((unsigned long)ptr,
3877 ((unsigned long)ptr)+l);
3878 qemu_put_ram_ptr(ptr);
3880 } else {
3881 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3882 !(pd & IO_MEM_ROMD)) {
3883 target_phys_addr_t addr1 = addr;
3884 /* I/O case */
3885 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3886 if (p)
3887 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3888 if (l >= 4 && ((addr1 & 3) == 0)) {
3889 /* 32 bit read access */
3890 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3891 stl_p(buf, val);
3892 l = 4;
3893 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3894 /* 16 bit read access */
3895 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3896 stw_p(buf, val);
3897 l = 2;
3898 } else {
3899 /* 8 bit read access */
3900 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3901 stb_p(buf, val);
3902 l = 1;
3904 } else {
3905 /* RAM case */
3906 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3907 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3908 qemu_put_ram_ptr(ptr);
3911 len -= l;
3912 buf += l;
3913 addr += l;
3917 /* used for ROM loading : can write in RAM and ROM */
3918 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3919 const uint8_t *buf, int len)
3921 int l;
3922 uint8_t *ptr;
3923 target_phys_addr_t page;
3924 unsigned long pd;
3925 PhysPageDesc *p;
3927 while (len > 0) {
3928 page = addr & TARGET_PAGE_MASK;
3929 l = (page + TARGET_PAGE_SIZE) - addr;
3930 if (l > len)
3931 l = len;
3932 p = phys_page_find(page >> TARGET_PAGE_BITS);
3933 if (!p) {
3934 pd = IO_MEM_UNASSIGNED;
3935 } else {
3936 pd = p->phys_offset;
3939 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3940 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3941 !(pd & IO_MEM_ROMD)) {
3942 /* do nothing */
3943 } else {
3944 unsigned long addr1;
3945 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3946 /* ROM/RAM case */
3947 ptr = qemu_get_ram_ptr(addr1);
3948 memcpy(ptr, buf, l);
3949 qemu_put_ram_ptr(ptr);
3951 len -= l;
3952 buf += l;
3953 addr += l;
3957 typedef struct {
3958 void *buffer;
3959 target_phys_addr_t addr;
3960 target_phys_addr_t len;
3961 } BounceBuffer;
3963 static BounceBuffer bounce;
3965 typedef struct MapClient {
3966 void *opaque;
3967 void (*callback)(void *opaque);
3968 QLIST_ENTRY(MapClient) link;
3969 } MapClient;
3971 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3972 = QLIST_HEAD_INITIALIZER(map_client_list);
3974 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3976 MapClient *client = qemu_malloc(sizeof(*client));
3978 client->opaque = opaque;
3979 client->callback = callback;
3980 QLIST_INSERT_HEAD(&map_client_list, client, link);
3981 return client;
3984 void cpu_unregister_map_client(void *_client)
3986 MapClient *client = (MapClient *)_client;
3988 QLIST_REMOVE(client, link);
3989 qemu_free(client);
3992 static void cpu_notify_map_clients(void)
3994 MapClient *client;
3996 while (!QLIST_EMPTY(&map_client_list)) {
3997 client = QLIST_FIRST(&map_client_list);
3998 client->callback(client->opaque);
3999 cpu_unregister_map_client(client);
4003 /* Map a physical memory region into a host virtual address.
4004 * May map a subset of the requested range, given by and returned in *plen.
4005 * May return NULL if resources needed to perform the mapping are exhausted.
4006 * Use only for reads OR writes - not for read-modify-write operations.
4007 * Use cpu_register_map_client() to know when retrying the map operation is
4008 * likely to succeed.
4010 void *cpu_physical_memory_map(target_phys_addr_t addr,
4011 target_phys_addr_t *plen,
4012 int is_write)
4014 target_phys_addr_t len = *plen;
4015 target_phys_addr_t done = 0;
4016 int l;
4017 uint8_t *ret = NULL;
4018 uint8_t *ptr;
4019 target_phys_addr_t page;
4020 unsigned long pd;
4021 PhysPageDesc *p;
4022 unsigned long addr1;
4024 while (len > 0) {
4025 page = addr & TARGET_PAGE_MASK;
4026 l = (page + TARGET_PAGE_SIZE) - addr;
4027 if (l > len)
4028 l = len;
4029 p = phys_page_find(page >> TARGET_PAGE_BITS);
4030 if (!p) {
4031 pd = IO_MEM_UNASSIGNED;
4032 } else {
4033 pd = p->phys_offset;
4036 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4037 if (done || bounce.buffer) {
4038 break;
4040 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4041 bounce.addr = addr;
4042 bounce.len = l;
4043 if (!is_write) {
4044 cpu_physical_memory_read(addr, bounce.buffer, l);
4046 ptr = bounce.buffer;
4047 } else {
4048 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4049 ptr = qemu_get_ram_ptr(addr1);
4051 if (!done) {
4052 ret = ptr;
4053 } else if (ret + done != ptr) {
4054 break;
4057 len -= l;
4058 addr += l;
4059 done += l;
4061 *plen = done;
4062 return ret;
4065 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4066 * Will also mark the memory as dirty if is_write == 1. access_len gives
4067 * the amount of memory that was actually read or written by the caller.
4069 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4070 int is_write, target_phys_addr_t access_len)
4072 unsigned long flush_len = (unsigned long)access_len;
4074 if (buffer != bounce.buffer) {
4075 if (is_write) {
4076 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4077 while (access_len) {
4078 unsigned l;
4079 l = TARGET_PAGE_SIZE;
4080 if (l > access_len)
4081 l = access_len;
4082 if (!cpu_physical_memory_is_dirty(addr1)) {
4083 /* invalidate code */
4084 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4085 /* set dirty bit */
4086 cpu_physical_memory_set_dirty_flags(
4087 addr1, (0xff & ~CODE_DIRTY_FLAG));
4089 addr1 += l;
4090 access_len -= l;
4092 dma_flush_range((unsigned long)buffer,
4093 (unsigned long)buffer + flush_len);
4095 if (xen_mapcache_enabled()) {
4096 uint8_t *buffer1 = buffer;
4097 uint8_t *end_buffer = buffer + len;
4099 while (buffer1 < end_buffer) {
4100 qemu_put_ram_ptr(buffer1);
4101 buffer1 += TARGET_PAGE_SIZE;
4104 return;
4106 if (is_write) {
4107 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4109 qemu_vfree(bounce.buffer);
4110 bounce.buffer = NULL;
4111 cpu_notify_map_clients();
4114 /* warning: addr must be aligned */
4115 uint32_t ldl_phys(target_phys_addr_t addr)
4117 int io_index;
4118 uint8_t *ptr;
4119 uint32_t val;
4120 unsigned long pd;
4121 PhysPageDesc *p;
4123 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4124 if (!p) {
4125 pd = IO_MEM_UNASSIGNED;
4126 } else {
4127 pd = p->phys_offset;
4130 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4131 !(pd & IO_MEM_ROMD)) {
4132 /* I/O case */
4133 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4134 if (p)
4135 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4136 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4137 } else {
4138 /* RAM case */
4139 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4140 (addr & ~TARGET_PAGE_MASK);
4141 val = ldl_p(ptr);
4143 return val;
4146 /* warning: addr must be aligned */
4147 uint64_t ldq_phys(target_phys_addr_t addr)
4149 int io_index;
4150 uint8_t *ptr;
4151 uint64_t val;
4152 unsigned long pd;
4153 PhysPageDesc *p;
4155 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4156 if (!p) {
4157 pd = IO_MEM_UNASSIGNED;
4158 } else {
4159 pd = p->phys_offset;
4162 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4163 !(pd & IO_MEM_ROMD)) {
4164 /* I/O case */
4165 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4166 if (p)
4167 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4168 #ifdef TARGET_WORDS_BIGENDIAN
4169 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4170 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4171 #else
4172 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4173 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4174 #endif
4175 } else {
4176 /* RAM case */
4177 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4178 (addr & ~TARGET_PAGE_MASK);
4179 val = ldq_p(ptr);
4181 return val;
4184 /* XXX: optimize */
4185 uint32_t ldub_phys(target_phys_addr_t addr)
4187 uint8_t val;
4188 cpu_physical_memory_read(addr, &val, 1);
4189 return val;
4192 /* warning: addr must be aligned */
4193 uint32_t lduw_phys(target_phys_addr_t addr)
4195 int io_index;
4196 uint8_t *ptr;
4197 uint64_t val;
4198 unsigned long pd;
4199 PhysPageDesc *p;
4201 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4202 if (!p) {
4203 pd = IO_MEM_UNASSIGNED;
4204 } else {
4205 pd = p->phys_offset;
4208 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4209 !(pd & IO_MEM_ROMD)) {
4210 /* I/O case */
4211 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4212 if (p)
4213 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4214 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4215 } else {
4216 /* RAM case */
4217 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4218 (addr & ~TARGET_PAGE_MASK);
4219 val = lduw_p(ptr);
4221 return val;
4224 /* warning: addr must be aligned. The ram page is not masked as dirty
4225 and the code inside is not invalidated. It is useful if the dirty
4226 bits are used to track modified PTEs */
4227 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4229 int io_index;
4230 uint8_t *ptr;
4231 unsigned long pd;
4232 PhysPageDesc *p;
4234 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4235 if (!p) {
4236 pd = IO_MEM_UNASSIGNED;
4237 } else {
4238 pd = p->phys_offset;
4241 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4242 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4243 if (p)
4244 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4245 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4246 } else {
4247 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4248 ptr = qemu_get_ram_ptr(addr1);
4249 stl_p(ptr, val);
4251 if (unlikely(in_migration)) {
4252 if (!cpu_physical_memory_is_dirty(addr1)) {
4253 /* invalidate code */
4254 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4255 /* set dirty bit */
4256 cpu_physical_memory_set_dirty_flags(
4257 addr1, (0xff & ~CODE_DIRTY_FLAG));
4263 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4265 int io_index;
4266 uint8_t *ptr;
4267 unsigned long pd;
4268 PhysPageDesc *p;
4270 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4271 if (!p) {
4272 pd = IO_MEM_UNASSIGNED;
4273 } else {
4274 pd = p->phys_offset;
4277 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4278 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4279 if (p)
4280 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4281 #ifdef TARGET_WORDS_BIGENDIAN
4282 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4283 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4284 #else
4285 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4286 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4287 #endif
4288 } else {
4289 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4290 (addr & ~TARGET_PAGE_MASK);
4291 stq_p(ptr, val);
4295 /* warning: addr must be aligned */
4296 void stl_phys(target_phys_addr_t addr, uint32_t val)
4298 int io_index;
4299 uint8_t *ptr;
4300 unsigned long pd;
4301 PhysPageDesc *p;
4303 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4304 if (!p) {
4305 pd = IO_MEM_UNASSIGNED;
4306 } else {
4307 pd = p->phys_offset;
4310 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4311 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4312 if (p)
4313 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4314 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4315 } else {
4316 unsigned long addr1;
4317 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4318 /* RAM case */
4319 ptr = qemu_get_ram_ptr(addr1);
4320 stl_p(ptr, val);
4321 if (!cpu_physical_memory_is_dirty(addr1)) {
4322 /* invalidate code */
4323 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4324 /* set dirty bit */
4325 cpu_physical_memory_set_dirty_flags(addr1,
4326 (0xff & ~CODE_DIRTY_FLAG));
4331 /* XXX: optimize */
4332 void stb_phys(target_phys_addr_t addr, uint32_t val)
4334 uint8_t v = val;
4335 cpu_physical_memory_write(addr, &v, 1);
4338 /* warning: addr must be aligned */
4339 void stw_phys(target_phys_addr_t addr, uint32_t val)
4341 int io_index;
4342 uint8_t *ptr;
4343 unsigned long pd;
4344 PhysPageDesc *p;
4346 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4347 if (!p) {
4348 pd = IO_MEM_UNASSIGNED;
4349 } else {
4350 pd = p->phys_offset;
4353 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4354 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4355 if (p)
4356 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4357 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4358 } else {
4359 unsigned long addr1;
4360 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4361 /* RAM case */
4362 ptr = qemu_get_ram_ptr(addr1);
4363 stw_p(ptr, val);
4364 if (!cpu_physical_memory_is_dirty(addr1)) {
4365 /* invalidate code */
4366 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4367 /* set dirty bit */
4368 cpu_physical_memory_set_dirty_flags(addr1,
4369 (0xff & ~CODE_DIRTY_FLAG));
4374 /* XXX: optimize */
4375 void stq_phys(target_phys_addr_t addr, uint64_t val)
4377 val = tswap64(val);
4378 cpu_physical_memory_write(addr, &val, 8);
4381 /* virtual memory access for debug (includes writing to ROM) */
4382 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4383 uint8_t *buf, int len, int is_write)
4385 int l;
4386 target_phys_addr_t phys_addr;
4387 target_ulong page;
4389 while (len > 0) {
4390 page = addr & TARGET_PAGE_MASK;
4391 phys_addr = cpu_get_phys_page_debug(env, page);
4392 /* if no physical page mapped, return an error */
4393 if (phys_addr == -1)
4394 return -1;
4395 l = (page + TARGET_PAGE_SIZE) - addr;
4396 if (l > len)
4397 l = len;
4398 phys_addr += (addr & ~TARGET_PAGE_MASK);
4399 if (is_write)
4400 cpu_physical_memory_write_rom(phys_addr, buf, l);
4401 else
4402 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4403 len -= l;
4404 buf += l;
4405 addr += l;
4407 return 0;
4409 #endif
4411 /* in deterministic execution mode, instructions doing device I/Os
4412 must be at the end of the TB */
4413 void cpu_io_recompile(CPUState *env, void *retaddr)
4415 TranslationBlock *tb;
4416 uint32_t n, cflags;
4417 target_ulong pc, cs_base;
4418 uint64_t flags;
4420 tb = tb_find_pc((unsigned long)retaddr);
4421 if (!tb) {
4422 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4423 retaddr);
4425 n = env->icount_decr.u16.low + tb->icount;
4426 cpu_restore_state(tb, env, (unsigned long)retaddr);
4427 /* Calculate how many instructions had been executed before the fault
4428 occurred. */
4429 n = n - env->icount_decr.u16.low;
4430 /* Generate a new TB ending on the I/O insn. */
4431 n++;
4432 /* On MIPS and SH, delay slot instructions can only be restarted if
4433 they were already the first instruction in the TB. If this is not
4434 the first instruction in a TB then re-execute the preceding
4435 branch. */
4436 #if defined(TARGET_MIPS)
4437 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4438 env->active_tc.PC -= 4;
4439 env->icount_decr.u16.low++;
4440 env->hflags &= ~MIPS_HFLAG_BMASK;
4442 #elif defined(TARGET_SH4)
4443 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4444 && n > 1) {
4445 env->pc -= 2;
4446 env->icount_decr.u16.low++;
4447 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4449 #endif
4450 /* This should never happen. */
4451 if (n > CF_COUNT_MASK)
4452 cpu_abort(env, "TB too big during recompile");
4454 cflags = n | CF_LAST_IO;
4455 pc = tb->pc;
4456 cs_base = tb->cs_base;
4457 flags = tb->flags;
4458 tb_phys_invalidate(tb, -1);
4459 /* FIXME: In theory this could raise an exception. In practice
4460 we have already translated the block once so it's probably ok. */
4461 tb_gen_code(env, pc, cs_base, flags, cflags);
4462 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4463 the first in the TB) then we end up generating a whole new TB and
4464 repeating the fault, which is horribly inefficient.
4465 Better would be to execute just this insn uncached, or generate a
4466 second new TB. */
4467 cpu_resume_from_signal(env, NULL);
4470 #if !defined(CONFIG_USER_ONLY)
4472 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4474 int i, target_code_size, max_target_code_size;
4475 int direct_jmp_count, direct_jmp2_count, cross_page;
4476 TranslationBlock *tb;
4478 target_code_size = 0;
4479 max_target_code_size = 0;
4480 cross_page = 0;
4481 direct_jmp_count = 0;
4482 direct_jmp2_count = 0;
4483 for(i = 0; i < nb_tbs; i++) {
4484 tb = &tbs[i];
4485 target_code_size += tb->size;
4486 if (tb->size > max_target_code_size)
4487 max_target_code_size = tb->size;
4488 if (tb->page_addr[1] != -1)
4489 cross_page++;
4490 if (tb->tb_next_offset[0] != 0xffff) {
4491 direct_jmp_count++;
4492 if (tb->tb_next_offset[1] != 0xffff) {
4493 direct_jmp2_count++;
4497 /* XXX: avoid using doubles ? */
4498 cpu_fprintf(f, "Translation buffer state:\n");
4499 cpu_fprintf(f, "gen code size %td/%ld\n",
4500 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4501 cpu_fprintf(f, "TB count %d/%d\n",
4502 nb_tbs, code_gen_max_blocks);
4503 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4504 nb_tbs ? target_code_size / nb_tbs : 0,
4505 max_target_code_size);
4506 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4507 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4508 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4509 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4510 cross_page,
4511 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4512 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4513 direct_jmp_count,
4514 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4515 direct_jmp2_count,
4516 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4517 cpu_fprintf(f, "\nStatistics:\n");
4518 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4519 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4520 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4521 #ifdef CONFIG_PROFILER
4522 tcg_dump_info(f, cpu_fprintf);
4523 #endif
4526 #define MMUSUFFIX _cmmu
4527 #define GETPC() NULL
4528 #define env cpu_single_env
4529 #define SOFTMMU_CODE_ACCESS
4531 #define SHIFT 0
4532 #include "softmmu_template.h"
4534 #define SHIFT 1
4535 #include "softmmu_template.h"
4537 #define SHIFT 2
4538 #include "softmmu_template.h"
4540 #define SHIFT 3
4541 #include "softmmu_template.h"
4543 #undef env
4545 #endif