fix event fallout in grlib_apbuart.c
[qemu/aliguori.git] / exec.c
blob63adb189e74bf4deea2ed01b7dd36b71868a0d75
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 //#define DEBUG_TB_INVALIDATE
61 //#define DEBUG_FLUSH
62 //#define DEBUG_TLB
63 //#define DEBUG_UNASSIGNED
65 /* make various TB consistency checks */
66 //#define DEBUG_TB_CHECK
67 //#define DEBUG_TLB_CHECK
69 //#define DEBUG_IOPORT
70 //#define DEBUG_SUBPAGE
72 #if !defined(CONFIG_USER_ONLY)
73 /* TB consistency checks only implemented for usermode emulation. */
74 #undef DEBUG_TB_CHECK
75 #endif
77 #define SMC_BITMAP_USE_THRESHOLD 10
79 static TranslationBlock *tbs;
80 static int code_gen_max_blocks;
81 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
82 static int nb_tbs;
83 /* any access to the tbs or the page table must use this lock */
84 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
86 #if defined(__arm__) || defined(__sparc_v9__)
87 /* The prologue must be reachable with a direct jump. ARM and Sparc64
88 have limited branch ranges (possibly also PPC) so place it in a
89 section close to code segment. */
90 #define code_gen_section \
91 __attribute__((__section__(".gen_code"))) \
92 __attribute__((aligned (32)))
93 #elif defined(_WIN32)
94 /* Maximum alignment for Win32 is 16. */
95 #define code_gen_section \
96 __attribute__((aligned (16)))
97 #else
98 #define code_gen_section \
99 __attribute__((aligned (32)))
100 #endif
102 uint8_t code_gen_prologue[1024] code_gen_section;
103 static uint8_t *code_gen_buffer;
104 static unsigned long code_gen_buffer_size;
105 /* threshold to flush the translated code buffer */
106 static unsigned long code_gen_buffer_max_size;
107 static uint8_t *code_gen_ptr;
109 #if !defined(CONFIG_USER_ONLY)
110 int phys_ram_fd;
111 static int in_migration;
113 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
115 static MemoryRegion *system_memory;
116 static MemoryRegion *system_io;
118 #endif
120 CPUState *first_cpu;
121 /* current CPU in the current thread. It is only valid inside
122 cpu_exec() */
123 CPUState *cpu_single_env;
124 /* 0 = Do not count executed instructions.
125 1 = Precise instruction counting.
126 2 = Adaptive rate instruction counting. */
127 int use_icount = 0;
128 /* Current instruction counter. While executing translated code this may
129 include some instructions that have not yet been executed. */
130 int64_t qemu_icount;
132 typedef struct PageDesc {
133 /* list of TBs intersecting this ram page */
134 TranslationBlock *first_tb;
135 /* in order to optimize self modifying code, we count the number
136 of lookups we do to a given page to use a bitmap */
137 unsigned int code_write_count;
138 uint8_t *code_bitmap;
139 #if defined(CONFIG_USER_ONLY)
140 unsigned long flags;
141 #endif
142 } PageDesc;
144 /* In system mode we want L1_MAP to be based on ram offsets,
145 while in user mode we want it to be based on virtual addresses. */
146 #if !defined(CONFIG_USER_ONLY)
147 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
148 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
149 #else
150 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
151 #endif
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
154 #endif
156 /* Size of the L2 (and L3, etc) page tables. */
157 #define L2_BITS 10
158 #define L2_SIZE (1 << L2_BITS)
160 /* The bits remaining after N lower levels of page tables. */
161 #define P_L1_BITS_REM \
162 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
163 #define V_L1_BITS_REM \
164 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
166 /* Size of the L1 page table. Avoid silly small sizes. */
167 #if P_L1_BITS_REM < 4
168 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
169 #else
170 #define P_L1_BITS P_L1_BITS_REM
171 #endif
173 #if V_L1_BITS_REM < 4
174 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
175 #else
176 #define V_L1_BITS V_L1_BITS_REM
177 #endif
179 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
180 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
182 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
183 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
185 unsigned long qemu_real_host_page_size;
186 unsigned long qemu_host_page_bits;
187 unsigned long qemu_host_page_size;
188 unsigned long qemu_host_page_mask;
190 /* This is a multi-level map on the virtual address space.
191 The bottom level has pointers to PageDesc. */
192 static void *l1_map[V_L1_SIZE];
194 #if !defined(CONFIG_USER_ONLY)
195 typedef struct PhysPageDesc {
196 /* offset in host memory of the page + io_index in the low bits */
197 ram_addr_t phys_offset;
198 ram_addr_t region_offset;
199 } PhysPageDesc;
201 /* This is a multi-level map on the physical address space.
202 The bottom level has pointers to PhysPageDesc. */
203 static void *l1_phys_map[P_L1_SIZE];
205 static void io_mem_init(void);
206 static void memory_map_init(void);
208 /* io memory support */
209 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
210 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
211 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
212 static char io_mem_used[IO_MEM_NB_ENTRIES];
213 static int io_mem_watch;
214 #endif
216 /* log support */
217 #ifdef WIN32
218 static const char *logfilename = "qemu.log";
219 #else
220 static const char *logfilename = "/tmp/qemu.log";
221 #endif
222 FILE *logfile;
223 int loglevel;
224 static int log_append = 0;
226 /* statistics */
227 #if !defined(CONFIG_USER_ONLY)
228 static int tlb_flush_count;
229 #endif
230 static int tb_flush_count;
231 static int tb_phys_invalidate_count;
233 #ifdef _WIN32
234 static void map_exec(void *addr, long size)
236 DWORD old_protect;
237 VirtualProtect(addr, size,
238 PAGE_EXECUTE_READWRITE, &old_protect);
241 #else
242 static void map_exec(void *addr, long size)
244 unsigned long start, end, page_size;
246 page_size = getpagesize();
247 start = (unsigned long)addr;
248 start &= ~(page_size - 1);
250 end = (unsigned long)addr + size;
251 end += page_size - 1;
252 end &= ~(page_size - 1);
254 mprotect((void *)start, end - start,
255 PROT_READ | PROT_WRITE | PROT_EXEC);
257 #endif
259 static void page_init(void)
261 /* NOTE: we can always suppose that qemu_host_page_size >=
262 TARGET_PAGE_SIZE */
263 #ifdef _WIN32
265 SYSTEM_INFO system_info;
267 GetSystemInfo(&system_info);
268 qemu_real_host_page_size = system_info.dwPageSize;
270 #else
271 qemu_real_host_page_size = getpagesize();
272 #endif
273 if (qemu_host_page_size == 0)
274 qemu_host_page_size = qemu_real_host_page_size;
275 if (qemu_host_page_size < TARGET_PAGE_SIZE)
276 qemu_host_page_size = TARGET_PAGE_SIZE;
277 qemu_host_page_bits = 0;
278 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
279 qemu_host_page_bits++;
280 qemu_host_page_mask = ~(qemu_host_page_size - 1);
282 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
284 #ifdef HAVE_KINFO_GETVMMAP
285 struct kinfo_vmentry *freep;
286 int i, cnt;
288 freep = kinfo_getvmmap(getpid(), &cnt);
289 if (freep) {
290 mmap_lock();
291 for (i = 0; i < cnt; i++) {
292 unsigned long startaddr, endaddr;
294 startaddr = freep[i].kve_start;
295 endaddr = freep[i].kve_end;
296 if (h2g_valid(startaddr)) {
297 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
299 if (h2g_valid(endaddr)) {
300 endaddr = h2g(endaddr);
301 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
302 } else {
303 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
304 endaddr = ~0ul;
305 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
306 #endif
310 free(freep);
311 mmap_unlock();
313 #else
314 FILE *f;
316 last_brk = (unsigned long)sbrk(0);
318 f = fopen("/compat/linux/proc/self/maps", "r");
319 if (f) {
320 mmap_lock();
322 do {
323 unsigned long startaddr, endaddr;
324 int n;
326 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
328 if (n == 2 && h2g_valid(startaddr)) {
329 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
331 if (h2g_valid(endaddr)) {
332 endaddr = h2g(endaddr);
333 } else {
334 endaddr = ~0ul;
336 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
338 } while (!feof(f));
340 fclose(f);
341 mmap_unlock();
343 #endif
345 #endif
348 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
350 PageDesc *pd;
351 void **lp;
352 int i;
354 #if defined(CONFIG_USER_ONLY)
355 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
356 # define ALLOC(P, SIZE) \
357 do { \
358 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
359 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
360 } while (0)
361 #else
362 # define ALLOC(P, SIZE) \
363 do { P = qemu_mallocz(SIZE); } while (0)
364 #endif
366 /* Level 1. Always allocated. */
367 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
369 /* Level 2..N-1. */
370 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
371 void **p = *lp;
373 if (p == NULL) {
374 if (!alloc) {
375 return NULL;
377 ALLOC(p, sizeof(void *) * L2_SIZE);
378 *lp = p;
381 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
384 pd = *lp;
385 if (pd == NULL) {
386 if (!alloc) {
387 return NULL;
389 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
390 *lp = pd;
393 #undef ALLOC
395 return pd + (index & (L2_SIZE - 1));
398 static inline PageDesc *page_find(tb_page_addr_t index)
400 return page_find_alloc(index, 0);
403 #if !defined(CONFIG_USER_ONLY)
404 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
406 PhysPageDesc *pd;
407 void **lp;
408 int i;
410 /* Level 1. Always allocated. */
411 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
413 /* Level 2..N-1. */
414 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
415 void **p = *lp;
416 if (p == NULL) {
417 if (!alloc) {
418 return NULL;
420 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
422 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
425 pd = *lp;
426 if (pd == NULL) {
427 int i;
429 if (!alloc) {
430 return NULL;
433 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
435 for (i = 0; i < L2_SIZE; i++) {
436 pd[i].phys_offset = IO_MEM_UNASSIGNED;
437 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
441 return pd + (index & (L2_SIZE - 1));
444 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
446 return phys_page_find_alloc(index, 0);
449 static void tlb_protect_code(ram_addr_t ram_addr);
450 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
451 target_ulong vaddr);
452 #define mmap_lock() do { } while(0)
453 #define mmap_unlock() do { } while(0)
454 #endif
456 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
458 #if defined(CONFIG_USER_ONLY)
459 /* Currently it is not recommended to allocate big chunks of data in
460 user mode. It will change when a dedicated libc will be used */
461 #define USE_STATIC_CODE_GEN_BUFFER
462 #endif
464 #ifdef USE_STATIC_CODE_GEN_BUFFER
465 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
466 __attribute__((aligned (CODE_GEN_ALIGN)));
467 #endif
469 static void code_gen_alloc(unsigned long tb_size)
471 #ifdef USE_STATIC_CODE_GEN_BUFFER
472 code_gen_buffer = static_code_gen_buffer;
473 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
474 map_exec(code_gen_buffer, code_gen_buffer_size);
475 #else
476 code_gen_buffer_size = tb_size;
477 if (code_gen_buffer_size == 0) {
478 #if defined(CONFIG_USER_ONLY)
479 /* in user mode, phys_ram_size is not meaningful */
480 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
481 #else
482 /* XXX: needs adjustments */
483 code_gen_buffer_size = (unsigned long)(ram_size / 4);
484 #endif
486 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
487 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
488 /* The code gen buffer location may have constraints depending on
489 the host cpu and OS */
490 #if defined(__linux__)
492 int flags;
493 void *start = NULL;
495 flags = MAP_PRIVATE | MAP_ANONYMOUS;
496 #if defined(__x86_64__)
497 flags |= MAP_32BIT;
498 /* Cannot map more than that */
499 if (code_gen_buffer_size > (800 * 1024 * 1024))
500 code_gen_buffer_size = (800 * 1024 * 1024);
501 #elif defined(__sparc_v9__)
502 // Map the buffer below 2G, so we can use direct calls and branches
503 flags |= MAP_FIXED;
504 start = (void *) 0x60000000UL;
505 if (code_gen_buffer_size > (512 * 1024 * 1024))
506 code_gen_buffer_size = (512 * 1024 * 1024);
507 #elif defined(__arm__)
508 /* Map the buffer below 32M, so we can use direct calls and branches */
509 flags |= MAP_FIXED;
510 start = (void *) 0x01000000UL;
511 if (code_gen_buffer_size > 16 * 1024 * 1024)
512 code_gen_buffer_size = 16 * 1024 * 1024;
513 #elif defined(__s390x__)
514 /* Map the buffer so that we can use direct calls and branches. */
515 /* We have a +- 4GB range on the branches; leave some slop. */
516 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
517 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
519 start = (void *)0x90000000UL;
520 #endif
521 code_gen_buffer = mmap(start, code_gen_buffer_size,
522 PROT_WRITE | PROT_READ | PROT_EXEC,
523 flags, -1, 0);
524 if (code_gen_buffer == MAP_FAILED) {
525 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
526 exit(1);
529 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
530 || defined(__DragonFly__) || defined(__OpenBSD__) \
531 || defined(__NetBSD__)
533 int flags;
534 void *addr = NULL;
535 flags = MAP_PRIVATE | MAP_ANONYMOUS;
536 #if defined(__x86_64__)
537 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
538 * 0x40000000 is free */
539 flags |= MAP_FIXED;
540 addr = (void *)0x40000000;
541 /* Cannot map more than that */
542 if (code_gen_buffer_size > (800 * 1024 * 1024))
543 code_gen_buffer_size = (800 * 1024 * 1024);
544 #elif defined(__sparc_v9__)
545 // Map the buffer below 2G, so we can use direct calls and branches
546 flags |= MAP_FIXED;
547 addr = (void *) 0x60000000UL;
548 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
549 code_gen_buffer_size = (512 * 1024 * 1024);
551 #endif
552 code_gen_buffer = mmap(addr, code_gen_buffer_size,
553 PROT_WRITE | PROT_READ | PROT_EXEC,
554 flags, -1, 0);
555 if (code_gen_buffer == MAP_FAILED) {
556 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
557 exit(1);
560 #else
561 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
562 map_exec(code_gen_buffer, code_gen_buffer_size);
563 #endif
564 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
565 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
566 code_gen_buffer_max_size = code_gen_buffer_size -
567 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
568 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
569 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
572 /* Must be called before using the QEMU cpus. 'tb_size' is the size
573 (in bytes) allocated to the translation buffer. Zero means default
574 size. */
575 void tcg_exec_init(unsigned long tb_size)
577 cpu_gen_init();
578 code_gen_alloc(tb_size);
579 code_gen_ptr = code_gen_buffer;
580 page_init();
581 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
582 /* There's no guest base to take into account, so go ahead and
583 initialize the prologue now. */
584 tcg_prologue_init(&tcg_ctx);
585 #endif
588 bool tcg_enabled(void)
590 return code_gen_buffer != NULL;
593 void cpu_exec_init_all(void)
595 #if !defined(CONFIG_USER_ONLY)
596 memory_map_init();
597 io_mem_init();
598 #endif
601 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
603 static int cpu_common_post_load(void *opaque, int version_id)
605 CPUState *env = opaque;
607 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
608 version_id is increased. */
609 env->interrupt_request &= ~0x01;
610 tlb_flush(env, 1);
612 return 0;
615 static const VMStateDescription vmstate_cpu_common = {
616 .name = "cpu_common",
617 .version_id = 1,
618 .minimum_version_id = 1,
619 .minimum_version_id_old = 1,
620 .post_load = cpu_common_post_load,
621 .fields = (VMStateField []) {
622 VMSTATE_UINT32(halted, CPUState),
623 VMSTATE_UINT32(interrupt_request, CPUState),
624 VMSTATE_END_OF_LIST()
627 #endif
629 CPUState *qemu_get_cpu(int cpu)
631 CPUState *env = first_cpu;
633 while (env) {
634 if (env->cpu_index == cpu)
635 break;
636 env = env->next_cpu;
639 return env;
642 void cpu_exec_init(CPUState *env)
644 CPUState **penv;
645 int cpu_index;
647 #if defined(CONFIG_USER_ONLY)
648 cpu_list_lock();
649 #endif
650 env->next_cpu = NULL;
651 penv = &first_cpu;
652 cpu_index = 0;
653 while (*penv != NULL) {
654 penv = &(*penv)->next_cpu;
655 cpu_index++;
657 env->cpu_index = cpu_index;
658 env->numa_node = 0;
659 QTAILQ_INIT(&env->breakpoints);
660 QTAILQ_INIT(&env->watchpoints);
661 #ifndef CONFIG_USER_ONLY
662 env->thread_id = qemu_get_thread_id();
663 #endif
664 *penv = env;
665 #if defined(CONFIG_USER_ONLY)
666 cpu_list_unlock();
667 #endif
668 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
669 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
670 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
671 cpu_save, cpu_load, env);
672 #endif
675 /* Allocate a new translation block. Flush the translation buffer if
676 too many translation blocks or too much generated code. */
677 static TranslationBlock *tb_alloc(target_ulong pc)
679 TranslationBlock *tb;
681 if (nb_tbs >= code_gen_max_blocks ||
682 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
683 return NULL;
684 tb = &tbs[nb_tbs++];
685 tb->pc = pc;
686 tb->cflags = 0;
687 return tb;
690 void tb_free(TranslationBlock *tb)
692 /* In practice this is mostly used for single use temporary TB
693 Ignore the hard cases and just back up if this TB happens to
694 be the last one generated. */
695 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
696 code_gen_ptr = tb->tc_ptr;
697 nb_tbs--;
701 static inline void invalidate_page_bitmap(PageDesc *p)
703 if (p->code_bitmap) {
704 qemu_free(p->code_bitmap);
705 p->code_bitmap = NULL;
707 p->code_write_count = 0;
710 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
712 static void page_flush_tb_1 (int level, void **lp)
714 int i;
716 if (*lp == NULL) {
717 return;
719 if (level == 0) {
720 PageDesc *pd = *lp;
721 for (i = 0; i < L2_SIZE; ++i) {
722 pd[i].first_tb = NULL;
723 invalidate_page_bitmap(pd + i);
725 } else {
726 void **pp = *lp;
727 for (i = 0; i < L2_SIZE; ++i) {
728 page_flush_tb_1 (level - 1, pp + i);
733 static void page_flush_tb(void)
735 int i;
736 for (i = 0; i < V_L1_SIZE; i++) {
737 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
741 /* flush all the translation blocks */
742 /* XXX: tb_flush is currently not thread safe */
743 void tb_flush(CPUState *env1)
745 CPUState *env;
746 #if defined(DEBUG_FLUSH)
747 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
748 (unsigned long)(code_gen_ptr - code_gen_buffer),
749 nb_tbs, nb_tbs > 0 ?
750 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
751 #endif
752 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
753 cpu_abort(env1, "Internal error: code buffer overflow\n");
755 nb_tbs = 0;
757 for(env = first_cpu; env != NULL; env = env->next_cpu) {
758 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
761 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
762 page_flush_tb();
764 code_gen_ptr = code_gen_buffer;
765 /* XXX: flush processor icache at this point if cache flush is
766 expensive */
767 tb_flush_count++;
770 #ifdef DEBUG_TB_CHECK
772 static void tb_invalidate_check(target_ulong address)
774 TranslationBlock *tb;
775 int i;
776 address &= TARGET_PAGE_MASK;
777 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
778 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
779 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
780 address >= tb->pc + tb->size)) {
781 printf("ERROR invalidate: address=" TARGET_FMT_lx
782 " PC=%08lx size=%04x\n",
783 address, (long)tb->pc, tb->size);
789 /* verify that all the pages have correct rights for code */
790 static void tb_page_check(void)
792 TranslationBlock *tb;
793 int i, flags1, flags2;
795 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
796 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
797 flags1 = page_get_flags(tb->pc);
798 flags2 = page_get_flags(tb->pc + tb->size - 1);
799 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
800 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
801 (long)tb->pc, tb->size, flags1, flags2);
807 #endif
809 /* invalidate one TB */
810 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
811 int next_offset)
813 TranslationBlock *tb1;
814 for(;;) {
815 tb1 = *ptb;
816 if (tb1 == tb) {
817 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
818 break;
820 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
824 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
826 TranslationBlock *tb1;
827 unsigned int n1;
829 for(;;) {
830 tb1 = *ptb;
831 n1 = (long)tb1 & 3;
832 tb1 = (TranslationBlock *)((long)tb1 & ~3);
833 if (tb1 == tb) {
834 *ptb = tb1->page_next[n1];
835 break;
837 ptb = &tb1->page_next[n1];
841 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
843 TranslationBlock *tb1, **ptb;
844 unsigned int n1;
846 ptb = &tb->jmp_next[n];
847 tb1 = *ptb;
848 if (tb1) {
849 /* find tb(n) in circular list */
850 for(;;) {
851 tb1 = *ptb;
852 n1 = (long)tb1 & 3;
853 tb1 = (TranslationBlock *)((long)tb1 & ~3);
854 if (n1 == n && tb1 == tb)
855 break;
856 if (n1 == 2) {
857 ptb = &tb1->jmp_first;
858 } else {
859 ptb = &tb1->jmp_next[n1];
862 /* now we can suppress tb(n) from the list */
863 *ptb = tb->jmp_next[n];
865 tb->jmp_next[n] = NULL;
869 /* reset the jump entry 'n' of a TB so that it is not chained to
870 another TB */
871 static inline void tb_reset_jump(TranslationBlock *tb, int n)
873 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
876 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
878 CPUState *env;
879 PageDesc *p;
880 unsigned int h, n1;
881 tb_page_addr_t phys_pc;
882 TranslationBlock *tb1, *tb2;
884 /* remove the TB from the hash list */
885 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
886 h = tb_phys_hash_func(phys_pc);
887 tb_remove(&tb_phys_hash[h], tb,
888 offsetof(TranslationBlock, phys_hash_next));
890 /* remove the TB from the page list */
891 if (tb->page_addr[0] != page_addr) {
892 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
893 tb_page_remove(&p->first_tb, tb);
894 invalidate_page_bitmap(p);
896 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
897 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
898 tb_page_remove(&p->first_tb, tb);
899 invalidate_page_bitmap(p);
902 tb_invalidated_flag = 1;
904 /* remove the TB from the hash list */
905 h = tb_jmp_cache_hash_func(tb->pc);
906 for(env = first_cpu; env != NULL; env = env->next_cpu) {
907 if (env->tb_jmp_cache[h] == tb)
908 env->tb_jmp_cache[h] = NULL;
911 /* suppress this TB from the two jump lists */
912 tb_jmp_remove(tb, 0);
913 tb_jmp_remove(tb, 1);
915 /* suppress any remaining jumps to this TB */
916 tb1 = tb->jmp_first;
917 for(;;) {
918 n1 = (long)tb1 & 3;
919 if (n1 == 2)
920 break;
921 tb1 = (TranslationBlock *)((long)tb1 & ~3);
922 tb2 = tb1->jmp_next[n1];
923 tb_reset_jump(tb1, n1);
924 tb1->jmp_next[n1] = NULL;
925 tb1 = tb2;
927 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
929 tb_phys_invalidate_count++;
932 static inline void set_bits(uint8_t *tab, int start, int len)
934 int end, mask, end1;
936 end = start + len;
937 tab += start >> 3;
938 mask = 0xff << (start & 7);
939 if ((start & ~7) == (end & ~7)) {
940 if (start < end) {
941 mask &= ~(0xff << (end & 7));
942 *tab |= mask;
944 } else {
945 *tab++ |= mask;
946 start = (start + 8) & ~7;
947 end1 = end & ~7;
948 while (start < end1) {
949 *tab++ = 0xff;
950 start += 8;
952 if (start < end) {
953 mask = ~(0xff << (end & 7));
954 *tab |= mask;
959 static void build_page_bitmap(PageDesc *p)
961 int n, tb_start, tb_end;
962 TranslationBlock *tb;
964 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
966 tb = p->first_tb;
967 while (tb != NULL) {
968 n = (long)tb & 3;
969 tb = (TranslationBlock *)((long)tb & ~3);
970 /* NOTE: this is subtle as a TB may span two physical pages */
971 if (n == 0) {
972 /* NOTE: tb_end may be after the end of the page, but
973 it is not a problem */
974 tb_start = tb->pc & ~TARGET_PAGE_MASK;
975 tb_end = tb_start + tb->size;
976 if (tb_end > TARGET_PAGE_SIZE)
977 tb_end = TARGET_PAGE_SIZE;
978 } else {
979 tb_start = 0;
980 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
982 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
983 tb = tb->page_next[n];
987 TranslationBlock *tb_gen_code(CPUState *env,
988 target_ulong pc, target_ulong cs_base,
989 int flags, int cflags)
991 TranslationBlock *tb;
992 uint8_t *tc_ptr;
993 tb_page_addr_t phys_pc, phys_page2;
994 target_ulong virt_page2;
995 int code_gen_size;
997 phys_pc = get_page_addr_code(env, pc);
998 tb = tb_alloc(pc);
999 if (!tb) {
1000 /* flush must be done */
1001 tb_flush(env);
1002 /* cannot fail at this point */
1003 tb = tb_alloc(pc);
1004 /* Don't forget to invalidate previous TB info. */
1005 tb_invalidated_flag = 1;
1007 tc_ptr = code_gen_ptr;
1008 tb->tc_ptr = tc_ptr;
1009 tb->cs_base = cs_base;
1010 tb->flags = flags;
1011 tb->cflags = cflags;
1012 cpu_gen_code(env, tb, &code_gen_size);
1013 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1015 /* check next page if needed */
1016 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1017 phys_page2 = -1;
1018 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1019 phys_page2 = get_page_addr_code(env, virt_page2);
1021 tb_link_page(tb, phys_pc, phys_page2);
1022 return tb;
1025 /* invalidate all TBs which intersect with the target physical page
1026 starting in range [start;end[. NOTE: start and end must refer to
1027 the same physical page. 'is_cpu_write_access' should be true if called
1028 from a real cpu write access: the virtual CPU will exit the current
1029 TB if code is modified inside this TB. */
1030 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1031 int is_cpu_write_access)
1033 TranslationBlock *tb, *tb_next, *saved_tb;
1034 CPUState *env = cpu_single_env;
1035 tb_page_addr_t tb_start, tb_end;
1036 PageDesc *p;
1037 int n;
1038 #ifdef TARGET_HAS_PRECISE_SMC
1039 int current_tb_not_found = is_cpu_write_access;
1040 TranslationBlock *current_tb = NULL;
1041 int current_tb_modified = 0;
1042 target_ulong current_pc = 0;
1043 target_ulong current_cs_base = 0;
1044 int current_flags = 0;
1045 #endif /* TARGET_HAS_PRECISE_SMC */
1047 p = page_find(start >> TARGET_PAGE_BITS);
1048 if (!p)
1049 return;
1050 if (!p->code_bitmap &&
1051 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1052 is_cpu_write_access) {
1053 /* build code bitmap */
1054 build_page_bitmap(p);
1057 /* we remove all the TBs in the range [start, end[ */
1058 /* XXX: see if in some cases it could be faster to invalidate all the code */
1059 tb = p->first_tb;
1060 while (tb != NULL) {
1061 n = (long)tb & 3;
1062 tb = (TranslationBlock *)((long)tb & ~3);
1063 tb_next = tb->page_next[n];
1064 /* NOTE: this is subtle as a TB may span two physical pages */
1065 if (n == 0) {
1066 /* NOTE: tb_end may be after the end of the page, but
1067 it is not a problem */
1068 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1069 tb_end = tb_start + tb->size;
1070 } else {
1071 tb_start = tb->page_addr[1];
1072 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1074 if (!(tb_end <= start || tb_start >= end)) {
1075 #ifdef TARGET_HAS_PRECISE_SMC
1076 if (current_tb_not_found) {
1077 current_tb_not_found = 0;
1078 current_tb = NULL;
1079 if (env->mem_io_pc) {
1080 /* now we have a real cpu fault */
1081 current_tb = tb_find_pc(env->mem_io_pc);
1084 if (current_tb == tb &&
1085 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1086 /* If we are modifying the current TB, we must stop
1087 its execution. We could be more precise by checking
1088 that the modification is after the current PC, but it
1089 would require a specialized function to partially
1090 restore the CPU state */
1092 current_tb_modified = 1;
1093 cpu_restore_state(current_tb, env, env->mem_io_pc);
1094 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1095 &current_flags);
1097 #endif /* TARGET_HAS_PRECISE_SMC */
1098 /* we need to do that to handle the case where a signal
1099 occurs while doing tb_phys_invalidate() */
1100 saved_tb = NULL;
1101 if (env) {
1102 saved_tb = env->current_tb;
1103 env->current_tb = NULL;
1105 tb_phys_invalidate(tb, -1);
1106 if (env) {
1107 env->current_tb = saved_tb;
1108 if (env->interrupt_request && env->current_tb)
1109 cpu_interrupt(env, env->interrupt_request);
1112 tb = tb_next;
1114 #if !defined(CONFIG_USER_ONLY)
1115 /* if no code remaining, no need to continue to use slow writes */
1116 if (!p->first_tb) {
1117 invalidate_page_bitmap(p);
1118 if (is_cpu_write_access) {
1119 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1122 #endif
1123 #ifdef TARGET_HAS_PRECISE_SMC
1124 if (current_tb_modified) {
1125 /* we generate a block containing just the instruction
1126 modifying the memory. It will ensure that it cannot modify
1127 itself */
1128 env->current_tb = NULL;
1129 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1130 cpu_resume_from_signal(env, NULL);
1132 #endif
1135 /* len must be <= 8 and start must be a multiple of len */
1136 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1138 PageDesc *p;
1139 int offset, b;
1140 #if 0
1141 if (1) {
1142 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1143 cpu_single_env->mem_io_vaddr, len,
1144 cpu_single_env->eip,
1145 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1147 #endif
1148 p = page_find(start >> TARGET_PAGE_BITS);
1149 if (!p)
1150 return;
1151 if (p->code_bitmap) {
1152 offset = start & ~TARGET_PAGE_MASK;
1153 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1154 if (b & ((1 << len) - 1))
1155 goto do_invalidate;
1156 } else {
1157 do_invalidate:
1158 tb_invalidate_phys_page_range(start, start + len, 1);
1162 #if !defined(CONFIG_SOFTMMU)
1163 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1164 unsigned long pc, void *puc)
1166 TranslationBlock *tb;
1167 PageDesc *p;
1168 int n;
1169 #ifdef TARGET_HAS_PRECISE_SMC
1170 TranslationBlock *current_tb = NULL;
1171 CPUState *env = cpu_single_env;
1172 int current_tb_modified = 0;
1173 target_ulong current_pc = 0;
1174 target_ulong current_cs_base = 0;
1175 int current_flags = 0;
1176 #endif
1178 addr &= TARGET_PAGE_MASK;
1179 p = page_find(addr >> TARGET_PAGE_BITS);
1180 if (!p)
1181 return;
1182 tb = p->first_tb;
1183 #ifdef TARGET_HAS_PRECISE_SMC
1184 if (tb && pc != 0) {
1185 current_tb = tb_find_pc(pc);
1187 #endif
1188 while (tb != NULL) {
1189 n = (long)tb & 3;
1190 tb = (TranslationBlock *)((long)tb & ~3);
1191 #ifdef TARGET_HAS_PRECISE_SMC
1192 if (current_tb == tb &&
1193 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1194 /* If we are modifying the current TB, we must stop
1195 its execution. We could be more precise by checking
1196 that the modification is after the current PC, but it
1197 would require a specialized function to partially
1198 restore the CPU state */
1200 current_tb_modified = 1;
1201 cpu_restore_state(current_tb, env, pc);
1202 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1203 &current_flags);
1205 #endif /* TARGET_HAS_PRECISE_SMC */
1206 tb_phys_invalidate(tb, addr);
1207 tb = tb->page_next[n];
1209 p->first_tb = NULL;
1210 #ifdef TARGET_HAS_PRECISE_SMC
1211 if (current_tb_modified) {
1212 /* we generate a block containing just the instruction
1213 modifying the memory. It will ensure that it cannot modify
1214 itself */
1215 env->current_tb = NULL;
1216 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1217 cpu_resume_from_signal(env, puc);
1219 #endif
1221 #endif
1223 /* add the tb in the target page and protect it if necessary */
1224 static inline void tb_alloc_page(TranslationBlock *tb,
1225 unsigned int n, tb_page_addr_t page_addr)
1227 PageDesc *p;
1228 #ifndef CONFIG_USER_ONLY
1229 bool page_already_protected;
1230 #endif
1232 tb->page_addr[n] = page_addr;
1233 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1234 tb->page_next[n] = p->first_tb;
1235 #ifndef CONFIG_USER_ONLY
1236 page_already_protected = p->first_tb != NULL;
1237 #endif
1238 p->first_tb = (TranslationBlock *)((long)tb | n);
1239 invalidate_page_bitmap(p);
1241 #if defined(TARGET_HAS_SMC) || 1
1243 #if defined(CONFIG_USER_ONLY)
1244 if (p->flags & PAGE_WRITE) {
1245 target_ulong addr;
1246 PageDesc *p2;
1247 int prot;
1249 /* force the host page as non writable (writes will have a
1250 page fault + mprotect overhead) */
1251 page_addr &= qemu_host_page_mask;
1252 prot = 0;
1253 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1254 addr += TARGET_PAGE_SIZE) {
1256 p2 = page_find (addr >> TARGET_PAGE_BITS);
1257 if (!p2)
1258 continue;
1259 prot |= p2->flags;
1260 p2->flags &= ~PAGE_WRITE;
1262 mprotect(g2h(page_addr), qemu_host_page_size,
1263 (prot & PAGE_BITS) & ~PAGE_WRITE);
1264 #ifdef DEBUG_TB_INVALIDATE
1265 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1266 page_addr);
1267 #endif
1269 #else
1270 /* if some code is already present, then the pages are already
1271 protected. So we handle the case where only the first TB is
1272 allocated in a physical page */
1273 if (!page_already_protected) {
1274 tlb_protect_code(page_addr);
1276 #endif
1278 #endif /* TARGET_HAS_SMC */
1281 /* add a new TB and link it to the physical page tables. phys_page2 is
1282 (-1) to indicate that only one page contains the TB. */
1283 void tb_link_page(TranslationBlock *tb,
1284 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1286 unsigned int h;
1287 TranslationBlock **ptb;
1289 /* Grab the mmap lock to stop another thread invalidating this TB
1290 before we are done. */
1291 mmap_lock();
1292 /* add in the physical hash table */
1293 h = tb_phys_hash_func(phys_pc);
1294 ptb = &tb_phys_hash[h];
1295 tb->phys_hash_next = *ptb;
1296 *ptb = tb;
1298 /* add in the page list */
1299 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1300 if (phys_page2 != -1)
1301 tb_alloc_page(tb, 1, phys_page2);
1302 else
1303 tb->page_addr[1] = -1;
1305 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1306 tb->jmp_next[0] = NULL;
1307 tb->jmp_next[1] = NULL;
1309 /* init original jump addresses */
1310 if (tb->tb_next_offset[0] != 0xffff)
1311 tb_reset_jump(tb, 0);
1312 if (tb->tb_next_offset[1] != 0xffff)
1313 tb_reset_jump(tb, 1);
1315 #ifdef DEBUG_TB_CHECK
1316 tb_page_check();
1317 #endif
1318 mmap_unlock();
1321 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1322 tb[1].tc_ptr. Return NULL if not found */
1323 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1325 int m_min, m_max, m;
1326 unsigned long v;
1327 TranslationBlock *tb;
1329 if (nb_tbs <= 0)
1330 return NULL;
1331 if (tc_ptr < (unsigned long)code_gen_buffer ||
1332 tc_ptr >= (unsigned long)code_gen_ptr)
1333 return NULL;
1334 /* binary search (cf Knuth) */
1335 m_min = 0;
1336 m_max = nb_tbs - 1;
1337 while (m_min <= m_max) {
1338 m = (m_min + m_max) >> 1;
1339 tb = &tbs[m];
1340 v = (unsigned long)tb->tc_ptr;
1341 if (v == tc_ptr)
1342 return tb;
1343 else if (tc_ptr < v) {
1344 m_max = m - 1;
1345 } else {
1346 m_min = m + 1;
1349 return &tbs[m_max];
1352 static void tb_reset_jump_recursive(TranslationBlock *tb);
1354 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1356 TranslationBlock *tb1, *tb_next, **ptb;
1357 unsigned int n1;
1359 tb1 = tb->jmp_next[n];
1360 if (tb1 != NULL) {
1361 /* find head of list */
1362 for(;;) {
1363 n1 = (long)tb1 & 3;
1364 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1365 if (n1 == 2)
1366 break;
1367 tb1 = tb1->jmp_next[n1];
1369 /* we are now sure now that tb jumps to tb1 */
1370 tb_next = tb1;
1372 /* remove tb from the jmp_first list */
1373 ptb = &tb_next->jmp_first;
1374 for(;;) {
1375 tb1 = *ptb;
1376 n1 = (long)tb1 & 3;
1377 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1378 if (n1 == n && tb1 == tb)
1379 break;
1380 ptb = &tb1->jmp_next[n1];
1382 *ptb = tb->jmp_next[n];
1383 tb->jmp_next[n] = NULL;
1385 /* suppress the jump to next tb in generated code */
1386 tb_reset_jump(tb, n);
1388 /* suppress jumps in the tb on which we could have jumped */
1389 tb_reset_jump_recursive(tb_next);
1393 static void tb_reset_jump_recursive(TranslationBlock *tb)
1395 tb_reset_jump_recursive2(tb, 0);
1396 tb_reset_jump_recursive2(tb, 1);
1399 #if defined(TARGET_HAS_ICE)
1400 #if defined(CONFIG_USER_ONLY)
1401 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1403 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1405 #else
1406 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1408 target_phys_addr_t addr;
1409 target_ulong pd;
1410 ram_addr_t ram_addr;
1411 PhysPageDesc *p;
1413 addr = cpu_get_phys_page_debug(env, pc);
1414 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1415 if (!p) {
1416 pd = IO_MEM_UNASSIGNED;
1417 } else {
1418 pd = p->phys_offset;
1420 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1421 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1423 #endif
1424 #endif /* TARGET_HAS_ICE */
1426 #if defined(CONFIG_USER_ONLY)
1427 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1432 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1433 int flags, CPUWatchpoint **watchpoint)
1435 return -ENOSYS;
1437 #else
1438 /* Add a watchpoint. */
1439 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1440 int flags, CPUWatchpoint **watchpoint)
1442 target_ulong len_mask = ~(len - 1);
1443 CPUWatchpoint *wp;
1445 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1446 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1447 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1448 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1449 return -EINVAL;
1451 wp = qemu_malloc(sizeof(*wp));
1453 wp->vaddr = addr;
1454 wp->len_mask = len_mask;
1455 wp->flags = flags;
1457 /* keep all GDB-injected watchpoints in front */
1458 if (flags & BP_GDB)
1459 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1460 else
1461 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1463 tlb_flush_page(env, addr);
1465 if (watchpoint)
1466 *watchpoint = wp;
1467 return 0;
1470 /* Remove a specific watchpoint. */
1471 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1472 int flags)
1474 target_ulong len_mask = ~(len - 1);
1475 CPUWatchpoint *wp;
1477 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1478 if (addr == wp->vaddr && len_mask == wp->len_mask
1479 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1480 cpu_watchpoint_remove_by_ref(env, wp);
1481 return 0;
1484 return -ENOENT;
1487 /* Remove a specific watchpoint by reference. */
1488 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1490 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1492 tlb_flush_page(env, watchpoint->vaddr);
1494 qemu_free(watchpoint);
1497 /* Remove all matching watchpoints. */
1498 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1500 CPUWatchpoint *wp, *next;
1502 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1503 if (wp->flags & mask)
1504 cpu_watchpoint_remove_by_ref(env, wp);
1507 #endif
1509 /* Add a breakpoint. */
1510 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1511 CPUBreakpoint **breakpoint)
1513 #if defined(TARGET_HAS_ICE)
1514 CPUBreakpoint *bp;
1516 bp = qemu_malloc(sizeof(*bp));
1518 bp->pc = pc;
1519 bp->flags = flags;
1521 /* keep all GDB-injected breakpoints in front */
1522 if (flags & BP_GDB)
1523 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1524 else
1525 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1527 breakpoint_invalidate(env, pc);
1529 if (breakpoint)
1530 *breakpoint = bp;
1531 return 0;
1532 #else
1533 return -ENOSYS;
1534 #endif
1537 /* Remove a specific breakpoint. */
1538 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1540 #if defined(TARGET_HAS_ICE)
1541 CPUBreakpoint *bp;
1543 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1544 if (bp->pc == pc && bp->flags == flags) {
1545 cpu_breakpoint_remove_by_ref(env, bp);
1546 return 0;
1549 return -ENOENT;
1550 #else
1551 return -ENOSYS;
1552 #endif
1555 /* Remove a specific breakpoint by reference. */
1556 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1558 #if defined(TARGET_HAS_ICE)
1559 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1561 breakpoint_invalidate(env, breakpoint->pc);
1563 qemu_free(breakpoint);
1564 #endif
1567 /* Remove all matching breakpoints. */
1568 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1570 #if defined(TARGET_HAS_ICE)
1571 CPUBreakpoint *bp, *next;
1573 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1574 if (bp->flags & mask)
1575 cpu_breakpoint_remove_by_ref(env, bp);
1577 #endif
1580 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1581 CPU loop after each instruction */
1582 void cpu_single_step(CPUState *env, int enabled)
1584 #if defined(TARGET_HAS_ICE)
1585 if (env->singlestep_enabled != enabled) {
1586 env->singlestep_enabled = enabled;
1587 if (kvm_enabled())
1588 kvm_update_guest_debug(env, 0);
1589 else {
1590 /* must flush all the translated code to avoid inconsistencies */
1591 /* XXX: only flush what is necessary */
1592 tb_flush(env);
1595 #endif
1598 /* enable or disable low levels log */
1599 void cpu_set_log(int log_flags)
1601 loglevel = log_flags;
1602 if (loglevel && !logfile) {
1603 logfile = fopen(logfilename, log_append ? "a" : "w");
1604 if (!logfile) {
1605 perror(logfilename);
1606 _exit(1);
1608 #if !defined(CONFIG_SOFTMMU)
1609 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1611 static char logfile_buf[4096];
1612 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1614 #elif !defined(_WIN32)
1615 /* Win32 doesn't support line-buffering and requires size >= 2 */
1616 setvbuf(logfile, NULL, _IOLBF, 0);
1617 #endif
1618 log_append = 1;
1620 if (!loglevel && logfile) {
1621 fclose(logfile);
1622 logfile = NULL;
1626 void cpu_set_log_filename(const char *filename)
1628 logfilename = strdup(filename);
1629 if (logfile) {
1630 fclose(logfile);
1631 logfile = NULL;
1633 cpu_set_log(loglevel);
1636 static void cpu_unlink_tb(CPUState *env)
1638 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1639 problem and hope the cpu will stop of its own accord. For userspace
1640 emulation this often isn't actually as bad as it sounds. Often
1641 signals are used primarily to interrupt blocking syscalls. */
1642 TranslationBlock *tb;
1643 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1645 spin_lock(&interrupt_lock);
1646 tb = env->current_tb;
1647 /* if the cpu is currently executing code, we must unlink it and
1648 all the potentially executing TB */
1649 if (tb) {
1650 env->current_tb = NULL;
1651 tb_reset_jump_recursive(tb);
1653 spin_unlock(&interrupt_lock);
1656 #ifndef CONFIG_USER_ONLY
1657 /* mask must never be zero, except for A20 change call */
1658 static void tcg_handle_interrupt(CPUState *env, int mask)
1660 int old_mask;
1662 old_mask = env->interrupt_request;
1663 env->interrupt_request |= mask;
1666 * If called from iothread context, wake the target cpu in
1667 * case its halted.
1669 if (!qemu_cpu_is_self(env)) {
1670 qemu_cpu_kick(env);
1671 return;
1674 if (use_icount) {
1675 env->icount_decr.u16.high = 0xffff;
1676 if (!can_do_io(env)
1677 && (mask & ~old_mask) != 0) {
1678 cpu_abort(env, "Raised interrupt while not in I/O function");
1680 } else {
1681 cpu_unlink_tb(env);
1685 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1687 #else /* CONFIG_USER_ONLY */
1689 void cpu_interrupt(CPUState *env, int mask)
1691 env->interrupt_request |= mask;
1692 cpu_unlink_tb(env);
1694 #endif /* CONFIG_USER_ONLY */
1696 void cpu_reset_interrupt(CPUState *env, int mask)
1698 env->interrupt_request &= ~mask;
1701 void cpu_exit(CPUState *env)
1703 env->exit_request = 1;
1704 cpu_unlink_tb(env);
1707 const CPULogItem cpu_log_items[] = {
1708 { CPU_LOG_TB_OUT_ASM, "out_asm",
1709 "show generated host assembly code for each compiled TB" },
1710 { CPU_LOG_TB_IN_ASM, "in_asm",
1711 "show target assembly code for each compiled TB" },
1712 { CPU_LOG_TB_OP, "op",
1713 "show micro ops for each compiled TB" },
1714 { CPU_LOG_TB_OP_OPT, "op_opt",
1715 "show micro ops "
1716 #ifdef TARGET_I386
1717 "before eflags optimization and "
1718 #endif
1719 "after liveness analysis" },
1720 { CPU_LOG_INT, "int",
1721 "show interrupts/exceptions in short format" },
1722 { CPU_LOG_EXEC, "exec",
1723 "show trace before each executed TB (lots of logs)" },
1724 { CPU_LOG_TB_CPU, "cpu",
1725 "show CPU state before block translation" },
1726 #ifdef TARGET_I386
1727 { CPU_LOG_PCALL, "pcall",
1728 "show protected mode far calls/returns/exceptions" },
1729 { CPU_LOG_RESET, "cpu_reset",
1730 "show CPU state before CPU resets" },
1731 #endif
1732 #ifdef DEBUG_IOPORT
1733 { CPU_LOG_IOPORT, "ioport",
1734 "show all i/o ports accesses" },
1735 #endif
1736 { 0, NULL, NULL },
1739 #ifndef CONFIG_USER_ONLY
1740 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1741 = QLIST_HEAD_INITIALIZER(memory_client_list);
1743 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1744 ram_addr_t size,
1745 ram_addr_t phys_offset,
1746 bool log_dirty)
1748 CPUPhysMemoryClient *client;
1749 QLIST_FOREACH(client, &memory_client_list, list) {
1750 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1754 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1755 target_phys_addr_t end)
1757 CPUPhysMemoryClient *client;
1758 QLIST_FOREACH(client, &memory_client_list, list) {
1759 int r = client->sync_dirty_bitmap(client, start, end);
1760 if (r < 0)
1761 return r;
1763 return 0;
1766 static int cpu_notify_migration_log(int enable)
1768 CPUPhysMemoryClient *client;
1769 QLIST_FOREACH(client, &memory_client_list, list) {
1770 int r = client->migration_log(client, enable);
1771 if (r < 0)
1772 return r;
1774 return 0;
1777 struct last_map {
1778 target_phys_addr_t start_addr;
1779 ram_addr_t size;
1780 ram_addr_t phys_offset;
1783 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1784 * address. Each intermediate table provides the next L2_BITs of guest
1785 * physical address space. The number of levels vary based on host and
1786 * guest configuration, making it efficient to build the final guest
1787 * physical address by seeding the L1 offset and shifting and adding in
1788 * each L2 offset as we recurse through them. */
1789 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1790 void **lp, target_phys_addr_t addr,
1791 struct last_map *map)
1793 int i;
1795 if (*lp == NULL) {
1796 return;
1798 if (level == 0) {
1799 PhysPageDesc *pd = *lp;
1800 addr <<= L2_BITS + TARGET_PAGE_BITS;
1801 for (i = 0; i < L2_SIZE; ++i) {
1802 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1803 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1805 if (map->size &&
1806 start_addr == map->start_addr + map->size &&
1807 pd[i].phys_offset == map->phys_offset + map->size) {
1809 map->size += TARGET_PAGE_SIZE;
1810 continue;
1811 } else if (map->size) {
1812 client->set_memory(client, map->start_addr,
1813 map->size, map->phys_offset, false);
1816 map->start_addr = start_addr;
1817 map->size = TARGET_PAGE_SIZE;
1818 map->phys_offset = pd[i].phys_offset;
1821 } else {
1822 void **pp = *lp;
1823 for (i = 0; i < L2_SIZE; ++i) {
1824 phys_page_for_each_1(client, level - 1, pp + i,
1825 (addr << L2_BITS) | i, map);
1830 static void phys_page_for_each(CPUPhysMemoryClient *client)
1832 int i;
1833 struct last_map map = { };
1835 for (i = 0; i < P_L1_SIZE; ++i) {
1836 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1837 l1_phys_map + i, i, &map);
1839 if (map.size) {
1840 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1841 false);
1845 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1847 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1848 phys_page_for_each(client);
1851 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1853 QLIST_REMOVE(client, list);
1855 #endif
1857 static int cmp1(const char *s1, int n, const char *s2)
1859 if (strlen(s2) != n)
1860 return 0;
1861 return memcmp(s1, s2, n) == 0;
1864 /* takes a comma separated list of log masks. Return 0 if error. */
1865 int cpu_str_to_log_mask(const char *str)
1867 const CPULogItem *item;
1868 int mask;
1869 const char *p, *p1;
1871 p = str;
1872 mask = 0;
1873 for(;;) {
1874 p1 = strchr(p, ',');
1875 if (!p1)
1876 p1 = p + strlen(p);
1877 if(cmp1(p,p1-p,"all")) {
1878 for(item = cpu_log_items; item->mask != 0; item++) {
1879 mask |= item->mask;
1881 } else {
1882 for(item = cpu_log_items; item->mask != 0; item++) {
1883 if (cmp1(p, p1 - p, item->name))
1884 goto found;
1886 return 0;
1888 found:
1889 mask |= item->mask;
1890 if (*p1 != ',')
1891 break;
1892 p = p1 + 1;
1894 return mask;
1897 void cpu_abort(CPUState *env, const char *fmt, ...)
1899 va_list ap;
1900 va_list ap2;
1902 va_start(ap, fmt);
1903 va_copy(ap2, ap);
1904 fprintf(stderr, "qemu: fatal: ");
1905 vfprintf(stderr, fmt, ap);
1906 fprintf(stderr, "\n");
1907 #ifdef TARGET_I386
1908 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1909 #else
1910 cpu_dump_state(env, stderr, fprintf, 0);
1911 #endif
1912 if (qemu_log_enabled()) {
1913 qemu_log("qemu: fatal: ");
1914 qemu_log_vprintf(fmt, ap2);
1915 qemu_log("\n");
1916 #ifdef TARGET_I386
1917 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1918 #else
1919 log_cpu_state(env, 0);
1920 #endif
1921 qemu_log_flush();
1922 qemu_log_close();
1924 va_end(ap2);
1925 va_end(ap);
1926 #if defined(CONFIG_USER_ONLY)
1928 struct sigaction act;
1929 sigfillset(&act.sa_mask);
1930 act.sa_handler = SIG_DFL;
1931 sigaction(SIGABRT, &act, NULL);
1933 #endif
1934 abort();
1937 CPUState *cpu_copy(CPUState *env)
1939 CPUState *new_env = cpu_init(env->cpu_model_str);
1940 CPUState *next_cpu = new_env->next_cpu;
1941 int cpu_index = new_env->cpu_index;
1942 #if defined(TARGET_HAS_ICE)
1943 CPUBreakpoint *bp;
1944 CPUWatchpoint *wp;
1945 #endif
1947 memcpy(new_env, env, sizeof(CPUState));
1949 /* Preserve chaining and index. */
1950 new_env->next_cpu = next_cpu;
1951 new_env->cpu_index = cpu_index;
1953 /* Clone all break/watchpoints.
1954 Note: Once we support ptrace with hw-debug register access, make sure
1955 BP_CPU break/watchpoints are handled correctly on clone. */
1956 QTAILQ_INIT(&env->breakpoints);
1957 QTAILQ_INIT(&env->watchpoints);
1958 #if defined(TARGET_HAS_ICE)
1959 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1960 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1962 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1963 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1964 wp->flags, NULL);
1966 #endif
1968 return new_env;
1971 #if !defined(CONFIG_USER_ONLY)
1973 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1975 unsigned int i;
1977 /* Discard jump cache entries for any tb which might potentially
1978 overlap the flushed page. */
1979 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1980 memset (&env->tb_jmp_cache[i], 0,
1981 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1983 i = tb_jmp_cache_hash_page(addr);
1984 memset (&env->tb_jmp_cache[i], 0,
1985 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1988 static CPUTLBEntry s_cputlb_empty_entry = {
1989 .addr_read = -1,
1990 .addr_write = -1,
1991 .addr_code = -1,
1992 .addend = -1,
1995 /* NOTE: if flush_global is true, also flush global entries (not
1996 implemented yet) */
1997 void tlb_flush(CPUState *env, int flush_global)
1999 int i;
2001 #if defined(DEBUG_TLB)
2002 printf("tlb_flush:\n");
2003 #endif
2004 /* must reset current TB so that interrupts cannot modify the
2005 links while we are modifying them */
2006 env->current_tb = NULL;
2008 for(i = 0; i < CPU_TLB_SIZE; i++) {
2009 int mmu_idx;
2010 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2011 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2015 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2017 env->tlb_flush_addr = -1;
2018 env->tlb_flush_mask = 0;
2019 tlb_flush_count++;
2022 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2024 if (addr == (tlb_entry->addr_read &
2025 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2026 addr == (tlb_entry->addr_write &
2027 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2028 addr == (tlb_entry->addr_code &
2029 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2030 *tlb_entry = s_cputlb_empty_entry;
2034 void tlb_flush_page(CPUState *env, target_ulong addr)
2036 int i;
2037 int mmu_idx;
2039 #if defined(DEBUG_TLB)
2040 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2041 #endif
2042 /* Check if we need to flush due to large pages. */
2043 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2044 #if defined(DEBUG_TLB)
2045 printf("tlb_flush_page: forced full flush ("
2046 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2047 env->tlb_flush_addr, env->tlb_flush_mask);
2048 #endif
2049 tlb_flush(env, 1);
2050 return;
2052 /* must reset current TB so that interrupts cannot modify the
2053 links while we are modifying them */
2054 env->current_tb = NULL;
2056 addr &= TARGET_PAGE_MASK;
2057 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2058 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2059 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2061 tlb_flush_jmp_cache(env, addr);
2064 /* update the TLBs so that writes to code in the virtual page 'addr'
2065 can be detected */
2066 static void tlb_protect_code(ram_addr_t ram_addr)
2068 cpu_physical_memory_reset_dirty(ram_addr,
2069 ram_addr + TARGET_PAGE_SIZE,
2070 CODE_DIRTY_FLAG);
2073 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2074 tested for self modifying code */
2075 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2076 target_ulong vaddr)
2078 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2081 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2082 unsigned long start, unsigned long length)
2084 unsigned long addr;
2085 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2086 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2087 if ((addr - start) < length) {
2088 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2093 /* Note: start and end must be within the same ram block. */
2094 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2095 int dirty_flags)
2097 CPUState *env;
2098 unsigned long length, start1;
2099 int i;
2101 start &= TARGET_PAGE_MASK;
2102 end = TARGET_PAGE_ALIGN(end);
2104 length = end - start;
2105 if (length == 0)
2106 return;
2107 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2109 /* we modify the TLB cache so that the dirty bit will be set again
2110 when accessing the range */
2111 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2112 /* Check that we don't span multiple blocks - this breaks the
2113 address comparisons below. */
2114 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2115 != (end - 1) - start) {
2116 abort();
2119 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2120 int mmu_idx;
2121 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2122 for(i = 0; i < CPU_TLB_SIZE; i++)
2123 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2124 start1, length);
2129 int cpu_physical_memory_set_dirty_tracking(int enable)
2131 int ret = 0;
2132 in_migration = enable;
2133 ret = cpu_notify_migration_log(!!enable);
2134 return ret;
2137 int cpu_physical_memory_get_dirty_tracking(void)
2139 return in_migration;
2142 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2143 target_phys_addr_t end_addr)
2145 int ret;
2147 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2148 return ret;
2151 int cpu_physical_log_start(target_phys_addr_t start_addr,
2152 ram_addr_t size)
2154 CPUPhysMemoryClient *client;
2155 QLIST_FOREACH(client, &memory_client_list, list) {
2156 if (client->log_start) {
2157 int r = client->log_start(client, start_addr, size);
2158 if (r < 0) {
2159 return r;
2163 return 0;
2166 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2167 ram_addr_t size)
2169 CPUPhysMemoryClient *client;
2170 QLIST_FOREACH(client, &memory_client_list, list) {
2171 if (client->log_stop) {
2172 int r = client->log_stop(client, start_addr, size);
2173 if (r < 0) {
2174 return r;
2178 return 0;
2181 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2183 ram_addr_t ram_addr;
2184 void *p;
2186 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2187 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2188 + tlb_entry->addend);
2189 ram_addr = qemu_ram_addr_from_host_nofail(p);
2190 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2191 tlb_entry->addr_write |= TLB_NOTDIRTY;
2196 /* update the TLB according to the current state of the dirty bits */
2197 void cpu_tlb_update_dirty(CPUState *env)
2199 int i;
2200 int mmu_idx;
2201 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2202 for(i = 0; i < CPU_TLB_SIZE; i++)
2203 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2207 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2209 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2210 tlb_entry->addr_write = vaddr;
2213 /* update the TLB corresponding to virtual page vaddr
2214 so that it is no longer dirty */
2215 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2217 int i;
2218 int mmu_idx;
2220 vaddr &= TARGET_PAGE_MASK;
2221 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2222 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2223 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2226 /* Our TLB does not support large pages, so remember the area covered by
2227 large pages and trigger a full TLB flush if these are invalidated. */
2228 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2229 target_ulong size)
2231 target_ulong mask = ~(size - 1);
2233 if (env->tlb_flush_addr == (target_ulong)-1) {
2234 env->tlb_flush_addr = vaddr & mask;
2235 env->tlb_flush_mask = mask;
2236 return;
2238 /* Extend the existing region to include the new page.
2239 This is a compromise between unnecessary flushes and the cost
2240 of maintaining a full variable size TLB. */
2241 mask &= env->tlb_flush_mask;
2242 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2243 mask <<= 1;
2245 env->tlb_flush_addr &= mask;
2246 env->tlb_flush_mask = mask;
2249 /* Add a new TLB entry. At most one entry for a given virtual address
2250 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2251 supplied size is only used by tlb_flush_page. */
2252 void tlb_set_page(CPUState *env, target_ulong vaddr,
2253 target_phys_addr_t paddr, int prot,
2254 int mmu_idx, target_ulong size)
2256 PhysPageDesc *p;
2257 unsigned long pd;
2258 unsigned int index;
2259 target_ulong address;
2260 target_ulong code_address;
2261 unsigned long addend;
2262 CPUTLBEntry *te;
2263 CPUWatchpoint *wp;
2264 target_phys_addr_t iotlb;
2266 assert(size >= TARGET_PAGE_SIZE);
2267 if (size != TARGET_PAGE_SIZE) {
2268 tlb_add_large_page(env, vaddr, size);
2270 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2271 if (!p) {
2272 pd = IO_MEM_UNASSIGNED;
2273 } else {
2274 pd = p->phys_offset;
2276 #if defined(DEBUG_TLB)
2277 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2278 " prot=%x idx=%d pd=0x%08lx\n",
2279 vaddr, paddr, prot, mmu_idx, pd);
2280 #endif
2282 address = vaddr;
2283 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2284 /* IO memory case (romd handled later) */
2285 address |= TLB_MMIO;
2287 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2288 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2289 /* Normal RAM. */
2290 iotlb = pd & TARGET_PAGE_MASK;
2291 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2292 iotlb |= IO_MEM_NOTDIRTY;
2293 else
2294 iotlb |= IO_MEM_ROM;
2295 } else {
2296 /* IO handlers are currently passed a physical address.
2297 It would be nice to pass an offset from the base address
2298 of that region. This would avoid having to special case RAM,
2299 and avoid full address decoding in every device.
2300 We can't use the high bits of pd for this because
2301 IO_MEM_ROMD uses these as a ram address. */
2302 iotlb = (pd & ~TARGET_PAGE_MASK);
2303 if (p) {
2304 iotlb += p->region_offset;
2305 } else {
2306 iotlb += paddr;
2310 code_address = address;
2311 /* Make accesses to pages with watchpoints go via the
2312 watchpoint trap routines. */
2313 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2314 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2315 /* Avoid trapping reads of pages with a write breakpoint. */
2316 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2317 iotlb = io_mem_watch + paddr;
2318 address |= TLB_MMIO;
2319 break;
2324 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2325 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2326 te = &env->tlb_table[mmu_idx][index];
2327 te->addend = addend - vaddr;
2328 if (prot & PAGE_READ) {
2329 te->addr_read = address;
2330 } else {
2331 te->addr_read = -1;
2334 if (prot & PAGE_EXEC) {
2335 te->addr_code = code_address;
2336 } else {
2337 te->addr_code = -1;
2339 if (prot & PAGE_WRITE) {
2340 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2341 (pd & IO_MEM_ROMD)) {
2342 /* Write access calls the I/O callback. */
2343 te->addr_write = address | TLB_MMIO;
2344 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2345 !cpu_physical_memory_is_dirty(pd)) {
2346 te->addr_write = address | TLB_NOTDIRTY;
2347 } else {
2348 te->addr_write = address;
2350 } else {
2351 te->addr_write = -1;
2355 #else
2357 void tlb_flush(CPUState *env, int flush_global)
2361 void tlb_flush_page(CPUState *env, target_ulong addr)
2366 * Walks guest process memory "regions" one by one
2367 * and calls callback function 'fn' for each region.
2370 struct walk_memory_regions_data
2372 walk_memory_regions_fn fn;
2373 void *priv;
2374 unsigned long start;
2375 int prot;
2378 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2379 abi_ulong end, int new_prot)
2381 if (data->start != -1ul) {
2382 int rc = data->fn(data->priv, data->start, end, data->prot);
2383 if (rc != 0) {
2384 return rc;
2388 data->start = (new_prot ? end : -1ul);
2389 data->prot = new_prot;
2391 return 0;
2394 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2395 abi_ulong base, int level, void **lp)
2397 abi_ulong pa;
2398 int i, rc;
2400 if (*lp == NULL) {
2401 return walk_memory_regions_end(data, base, 0);
2404 if (level == 0) {
2405 PageDesc *pd = *lp;
2406 for (i = 0; i < L2_SIZE; ++i) {
2407 int prot = pd[i].flags;
2409 pa = base | (i << TARGET_PAGE_BITS);
2410 if (prot != data->prot) {
2411 rc = walk_memory_regions_end(data, pa, prot);
2412 if (rc != 0) {
2413 return rc;
2417 } else {
2418 void **pp = *lp;
2419 for (i = 0; i < L2_SIZE; ++i) {
2420 pa = base | ((abi_ulong)i <<
2421 (TARGET_PAGE_BITS + L2_BITS * level));
2422 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2423 if (rc != 0) {
2424 return rc;
2429 return 0;
2432 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2434 struct walk_memory_regions_data data;
2435 unsigned long i;
2437 data.fn = fn;
2438 data.priv = priv;
2439 data.start = -1ul;
2440 data.prot = 0;
2442 for (i = 0; i < V_L1_SIZE; i++) {
2443 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2444 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2445 if (rc != 0) {
2446 return rc;
2450 return walk_memory_regions_end(&data, 0, 0);
2453 static int dump_region(void *priv, abi_ulong start,
2454 abi_ulong end, unsigned long prot)
2456 FILE *f = (FILE *)priv;
2458 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2459 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2460 start, end, end - start,
2461 ((prot & PAGE_READ) ? 'r' : '-'),
2462 ((prot & PAGE_WRITE) ? 'w' : '-'),
2463 ((prot & PAGE_EXEC) ? 'x' : '-'));
2465 return (0);
2468 /* dump memory mappings */
2469 void page_dump(FILE *f)
2471 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2472 "start", "end", "size", "prot");
2473 walk_memory_regions(f, dump_region);
2476 int page_get_flags(target_ulong address)
2478 PageDesc *p;
2480 p = page_find(address >> TARGET_PAGE_BITS);
2481 if (!p)
2482 return 0;
2483 return p->flags;
2486 /* Modify the flags of a page and invalidate the code if necessary.
2487 The flag PAGE_WRITE_ORG is positioned automatically depending
2488 on PAGE_WRITE. The mmap_lock should already be held. */
2489 void page_set_flags(target_ulong start, target_ulong end, int flags)
2491 target_ulong addr, len;
2493 /* This function should never be called with addresses outside the
2494 guest address space. If this assert fires, it probably indicates
2495 a missing call to h2g_valid. */
2496 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2497 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2498 #endif
2499 assert(start < end);
2501 start = start & TARGET_PAGE_MASK;
2502 end = TARGET_PAGE_ALIGN(end);
2504 if (flags & PAGE_WRITE) {
2505 flags |= PAGE_WRITE_ORG;
2508 for (addr = start, len = end - start;
2509 len != 0;
2510 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2511 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2513 /* If the write protection bit is set, then we invalidate
2514 the code inside. */
2515 if (!(p->flags & PAGE_WRITE) &&
2516 (flags & PAGE_WRITE) &&
2517 p->first_tb) {
2518 tb_invalidate_phys_page(addr, 0, NULL);
2520 p->flags = flags;
2524 int page_check_range(target_ulong start, target_ulong len, int flags)
2526 PageDesc *p;
2527 target_ulong end;
2528 target_ulong addr;
2530 /* This function should never be called with addresses outside the
2531 guest address space. If this assert fires, it probably indicates
2532 a missing call to h2g_valid. */
2533 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2534 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2535 #endif
2537 if (len == 0) {
2538 return 0;
2540 if (start + len - 1 < start) {
2541 /* We've wrapped around. */
2542 return -1;
2545 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2546 start = start & TARGET_PAGE_MASK;
2548 for (addr = start, len = end - start;
2549 len != 0;
2550 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2551 p = page_find(addr >> TARGET_PAGE_BITS);
2552 if( !p )
2553 return -1;
2554 if( !(p->flags & PAGE_VALID) )
2555 return -1;
2557 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2558 return -1;
2559 if (flags & PAGE_WRITE) {
2560 if (!(p->flags & PAGE_WRITE_ORG))
2561 return -1;
2562 /* unprotect the page if it was put read-only because it
2563 contains translated code */
2564 if (!(p->flags & PAGE_WRITE)) {
2565 if (!page_unprotect(addr, 0, NULL))
2566 return -1;
2568 return 0;
2571 return 0;
2574 /* called from signal handler: invalidate the code and unprotect the
2575 page. Return TRUE if the fault was successfully handled. */
2576 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2578 unsigned int prot;
2579 PageDesc *p;
2580 target_ulong host_start, host_end, addr;
2582 /* Technically this isn't safe inside a signal handler. However we
2583 know this only ever happens in a synchronous SEGV handler, so in
2584 practice it seems to be ok. */
2585 mmap_lock();
2587 p = page_find(address >> TARGET_PAGE_BITS);
2588 if (!p) {
2589 mmap_unlock();
2590 return 0;
2593 /* if the page was really writable, then we change its
2594 protection back to writable */
2595 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2596 host_start = address & qemu_host_page_mask;
2597 host_end = host_start + qemu_host_page_size;
2599 prot = 0;
2600 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2601 p = page_find(addr >> TARGET_PAGE_BITS);
2602 p->flags |= PAGE_WRITE;
2603 prot |= p->flags;
2605 /* and since the content will be modified, we must invalidate
2606 the corresponding translated code. */
2607 tb_invalidate_phys_page(addr, pc, puc);
2608 #ifdef DEBUG_TB_CHECK
2609 tb_invalidate_check(addr);
2610 #endif
2612 mprotect((void *)g2h(host_start), qemu_host_page_size,
2613 prot & PAGE_BITS);
2615 mmap_unlock();
2616 return 1;
2618 mmap_unlock();
2619 return 0;
2622 static inline void tlb_set_dirty(CPUState *env,
2623 unsigned long addr, target_ulong vaddr)
2626 #endif /* defined(CONFIG_USER_ONLY) */
2628 #if !defined(CONFIG_USER_ONLY)
2630 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2631 typedef struct subpage_t {
2632 target_phys_addr_t base;
2633 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2634 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2635 } subpage_t;
2637 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2638 ram_addr_t memory, ram_addr_t region_offset);
2639 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2640 ram_addr_t orig_memory,
2641 ram_addr_t region_offset);
2642 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2643 need_subpage) \
2644 do { \
2645 if (addr > start_addr) \
2646 start_addr2 = 0; \
2647 else { \
2648 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2649 if (start_addr2 > 0) \
2650 need_subpage = 1; \
2653 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2654 end_addr2 = TARGET_PAGE_SIZE - 1; \
2655 else { \
2656 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2657 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2658 need_subpage = 1; \
2660 } while (0)
2662 /* register physical memory.
2663 For RAM, 'size' must be a multiple of the target page size.
2664 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2665 io memory page. The address used when calling the IO function is
2666 the offset from the start of the region, plus region_offset. Both
2667 start_addr and region_offset are rounded down to a page boundary
2668 before calculating this offset. This should not be a problem unless
2669 the low bits of start_addr and region_offset differ. */
2670 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2671 ram_addr_t size,
2672 ram_addr_t phys_offset,
2673 ram_addr_t region_offset,
2674 bool log_dirty)
2676 target_phys_addr_t addr, end_addr;
2677 PhysPageDesc *p;
2678 CPUState *env;
2679 ram_addr_t orig_size = size;
2680 subpage_t *subpage;
2682 assert(size);
2683 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2685 if (phys_offset == IO_MEM_UNASSIGNED) {
2686 region_offset = start_addr;
2688 region_offset &= TARGET_PAGE_MASK;
2689 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2690 end_addr = start_addr + (target_phys_addr_t)size;
2692 addr = start_addr;
2693 do {
2694 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2695 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2696 ram_addr_t orig_memory = p->phys_offset;
2697 target_phys_addr_t start_addr2, end_addr2;
2698 int need_subpage = 0;
2700 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2701 need_subpage);
2702 if (need_subpage) {
2703 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2704 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2705 &p->phys_offset, orig_memory,
2706 p->region_offset);
2707 } else {
2708 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2709 >> IO_MEM_SHIFT];
2711 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2712 region_offset);
2713 p->region_offset = 0;
2714 } else {
2715 p->phys_offset = phys_offset;
2716 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2717 (phys_offset & IO_MEM_ROMD))
2718 phys_offset += TARGET_PAGE_SIZE;
2720 } else {
2721 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2722 p->phys_offset = phys_offset;
2723 p->region_offset = region_offset;
2724 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2725 (phys_offset & IO_MEM_ROMD)) {
2726 phys_offset += TARGET_PAGE_SIZE;
2727 } else {
2728 target_phys_addr_t start_addr2, end_addr2;
2729 int need_subpage = 0;
2731 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2732 end_addr2, need_subpage);
2734 if (need_subpage) {
2735 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2736 &p->phys_offset, IO_MEM_UNASSIGNED,
2737 addr & TARGET_PAGE_MASK);
2738 subpage_register(subpage, start_addr2, end_addr2,
2739 phys_offset, region_offset);
2740 p->region_offset = 0;
2744 region_offset += TARGET_PAGE_SIZE;
2745 addr += TARGET_PAGE_SIZE;
2746 } while (addr != end_addr);
2748 /* since each CPU stores ram addresses in its TLB cache, we must
2749 reset the modified entries */
2750 /* XXX: slow ! */
2751 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2752 tlb_flush(env, 1);
2756 /* XXX: temporary until new memory mapping API */
2757 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2759 PhysPageDesc *p;
2761 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2762 if (!p)
2763 return IO_MEM_UNASSIGNED;
2764 return p->phys_offset;
2767 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2769 if (kvm_enabled())
2770 kvm_coalesce_mmio_region(addr, size);
2773 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2775 if (kvm_enabled())
2776 kvm_uncoalesce_mmio_region(addr, size);
2779 void qemu_flush_coalesced_mmio_buffer(void)
2781 if (kvm_enabled())
2782 kvm_flush_coalesced_mmio_buffer();
2785 #if defined(__linux__) && !defined(TARGET_S390X)
2787 #include <sys/vfs.h>
2789 #define HUGETLBFS_MAGIC 0x958458f6
2791 static long gethugepagesize(const char *path)
2793 struct statfs fs;
2794 int ret;
2796 do {
2797 ret = statfs(path, &fs);
2798 } while (ret != 0 && errno == EINTR);
2800 if (ret != 0) {
2801 perror(path);
2802 return 0;
2805 if (fs.f_type != HUGETLBFS_MAGIC)
2806 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2808 return fs.f_bsize;
2811 static void *file_ram_alloc(RAMBlock *block,
2812 ram_addr_t memory,
2813 const char *path)
2815 char *filename;
2816 void *area;
2817 int fd;
2818 #ifdef MAP_POPULATE
2819 int flags;
2820 #endif
2821 unsigned long hpagesize;
2823 hpagesize = gethugepagesize(path);
2824 if (!hpagesize) {
2825 return NULL;
2828 if (memory < hpagesize) {
2829 return NULL;
2832 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2833 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2834 return NULL;
2837 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2838 return NULL;
2841 fd = mkstemp(filename);
2842 if (fd < 0) {
2843 perror("unable to create backing store for hugepages");
2844 free(filename);
2845 return NULL;
2847 unlink(filename);
2848 free(filename);
2850 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2853 * ftruncate is not supported by hugetlbfs in older
2854 * hosts, so don't bother bailing out on errors.
2855 * If anything goes wrong with it under other filesystems,
2856 * mmap will fail.
2858 if (ftruncate(fd, memory))
2859 perror("ftruncate");
2861 #ifdef MAP_POPULATE
2862 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2863 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2864 * to sidestep this quirk.
2866 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2867 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2868 #else
2869 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2870 #endif
2871 if (area == MAP_FAILED) {
2872 perror("file_ram_alloc: can't mmap RAM pages");
2873 close(fd);
2874 return (NULL);
2876 block->fd = fd;
2877 return area;
2879 #endif
2881 static ram_addr_t find_ram_offset(ram_addr_t size)
2883 RAMBlock *block, *next_block;
2884 ram_addr_t offset = 0, mingap = RAM_ADDR_MAX;
2886 if (QLIST_EMPTY(&ram_list.blocks))
2887 return 0;
2889 QLIST_FOREACH(block, &ram_list.blocks, next) {
2890 ram_addr_t end, next = RAM_ADDR_MAX;
2892 end = block->offset + block->length;
2894 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2895 if (next_block->offset >= end) {
2896 next = MIN(next, next_block->offset);
2899 if (next - end >= size && next - end < mingap) {
2900 offset = end;
2901 mingap = next - end;
2904 return offset;
2907 static ram_addr_t last_ram_offset(void)
2909 RAMBlock *block;
2910 ram_addr_t last = 0;
2912 QLIST_FOREACH(block, &ram_list.blocks, next)
2913 last = MAX(last, block->offset + block->length);
2915 return last;
2918 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2919 ram_addr_t size, void *host)
2921 RAMBlock *new_block, *block;
2923 size = TARGET_PAGE_ALIGN(size);
2924 new_block = qemu_mallocz(sizeof(*new_block));
2926 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2927 char *id = dev->parent_bus->info->get_dev_path(dev);
2928 if (id) {
2929 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2930 qemu_free(id);
2933 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2935 QLIST_FOREACH(block, &ram_list.blocks, next) {
2936 if (!strcmp(block->idstr, new_block->idstr)) {
2937 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2938 new_block->idstr);
2939 abort();
2943 new_block->offset = find_ram_offset(size);
2944 if (host) {
2945 new_block->host = host;
2946 new_block->flags |= RAM_PREALLOC_MASK;
2947 } else {
2948 if (mem_path) {
2949 #if defined (__linux__) && !defined(TARGET_S390X)
2950 new_block->host = file_ram_alloc(new_block, size, mem_path);
2951 if (!new_block->host) {
2952 new_block->host = qemu_vmalloc(size);
2953 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2955 #else
2956 fprintf(stderr, "-mem-path option unsupported\n");
2957 exit(1);
2958 #endif
2959 } else {
2960 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2961 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2962 an system defined value, which is at least 256GB. Larger systems
2963 have larger values. We put the guest between the end of data
2964 segment (system break) and this value. We use 32GB as a base to
2965 have enough room for the system break to grow. */
2966 new_block->host = mmap((void*)0x800000000, size,
2967 PROT_EXEC|PROT_READ|PROT_WRITE,
2968 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2969 if (new_block->host == MAP_FAILED) {
2970 fprintf(stderr, "Allocating RAM failed\n");
2971 abort();
2973 #else
2974 if (xen_enabled()) {
2975 xen_ram_alloc(new_block->offset, size);
2976 } else {
2977 new_block->host = qemu_vmalloc(size);
2979 #endif
2980 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2983 new_block->length = size;
2985 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2987 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2988 last_ram_offset() >> TARGET_PAGE_BITS);
2989 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2990 0xff, size >> TARGET_PAGE_BITS);
2992 if (kvm_enabled())
2993 kvm_setup_guest_memory(new_block->host, size);
2995 return new_block->offset;
2998 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
3000 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
3003 void qemu_ram_free_from_ptr(ram_addr_t addr)
3005 RAMBlock *block;
3007 QLIST_FOREACH(block, &ram_list.blocks, next) {
3008 if (addr == block->offset) {
3009 QLIST_REMOVE(block, next);
3010 qemu_free(block);
3011 return;
3016 void qemu_ram_free(ram_addr_t addr)
3018 RAMBlock *block;
3020 QLIST_FOREACH(block, &ram_list.blocks, next) {
3021 if (addr == block->offset) {
3022 QLIST_REMOVE(block, next);
3023 if (block->flags & RAM_PREALLOC_MASK) {
3025 } else if (mem_path) {
3026 #if defined (__linux__) && !defined(TARGET_S390X)
3027 if (block->fd) {
3028 munmap(block->host, block->length);
3029 close(block->fd);
3030 } else {
3031 qemu_vfree(block->host);
3033 #else
3034 abort();
3035 #endif
3036 } else {
3037 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3038 munmap(block->host, block->length);
3039 #else
3040 if (xen_enabled()) {
3041 xen_invalidate_map_cache_entry(block->host);
3042 } else {
3043 qemu_vfree(block->host);
3045 #endif
3047 qemu_free(block);
3048 return;
3054 #ifndef _WIN32
3055 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3057 RAMBlock *block;
3058 ram_addr_t offset;
3059 int flags;
3060 void *area, *vaddr;
3062 QLIST_FOREACH(block, &ram_list.blocks, next) {
3063 offset = addr - block->offset;
3064 if (offset < block->length) {
3065 vaddr = block->host + offset;
3066 if (block->flags & RAM_PREALLOC_MASK) {
3068 } else {
3069 flags = MAP_FIXED;
3070 munmap(vaddr, length);
3071 if (mem_path) {
3072 #if defined(__linux__) && !defined(TARGET_S390X)
3073 if (block->fd) {
3074 #ifdef MAP_POPULATE
3075 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3076 MAP_PRIVATE;
3077 #else
3078 flags |= MAP_PRIVATE;
3079 #endif
3080 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3081 flags, block->fd, offset);
3082 } else {
3083 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3084 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3085 flags, -1, 0);
3087 #else
3088 abort();
3089 #endif
3090 } else {
3091 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3092 flags |= MAP_SHARED | MAP_ANONYMOUS;
3093 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3094 flags, -1, 0);
3095 #else
3096 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3097 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3098 flags, -1, 0);
3099 #endif
3101 if (area != vaddr) {
3102 fprintf(stderr, "Could not remap addr: "
3103 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3104 length, addr);
3105 exit(1);
3107 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3109 return;
3113 #endif /* !_WIN32 */
3115 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3116 With the exception of the softmmu code in this file, this should
3117 only be used for local memory (e.g. video ram) that the device owns,
3118 and knows it isn't going to access beyond the end of the block.
3120 It should not be used for general purpose DMA.
3121 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3123 void *qemu_get_ram_ptr(ram_addr_t addr)
3125 RAMBlock *block;
3127 QLIST_FOREACH(block, &ram_list.blocks, next) {
3128 if (addr - block->offset < block->length) {
3129 /* Move this entry to to start of the list. */
3130 if (block != QLIST_FIRST(&ram_list.blocks)) {
3131 QLIST_REMOVE(block, next);
3132 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3134 if (xen_enabled()) {
3135 /* We need to check if the requested address is in the RAM
3136 * because we don't want to map the entire memory in QEMU.
3137 * In that case just map until the end of the page.
3139 if (block->offset == 0) {
3140 return xen_map_cache(addr, 0, 0);
3141 } else if (block->host == NULL) {
3142 block->host =
3143 xen_map_cache(block->offset, block->length, 1);
3146 return block->host + (addr - block->offset);
3150 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3151 abort();
3153 return NULL;
3156 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3157 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3159 void *qemu_safe_ram_ptr(ram_addr_t addr)
3161 RAMBlock *block;
3163 QLIST_FOREACH(block, &ram_list.blocks, next) {
3164 if (addr - block->offset < block->length) {
3165 if (xen_enabled()) {
3166 /* We need to check if the requested address is in the RAM
3167 * because we don't want to map the entire memory in QEMU.
3168 * In that case just map until the end of the page.
3170 if (block->offset == 0) {
3171 return xen_map_cache(addr, 0, 0);
3172 } else if (block->host == NULL) {
3173 block->host =
3174 xen_map_cache(block->offset, block->length, 1);
3177 return block->host + (addr - block->offset);
3181 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3182 abort();
3184 return NULL;
3187 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3188 * but takes a size argument */
3189 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3191 if (*size == 0) {
3192 return NULL;
3194 if (xen_enabled()) {
3195 return xen_map_cache(addr, *size, 1);
3196 } else {
3197 RAMBlock *block;
3199 QLIST_FOREACH(block, &ram_list.blocks, next) {
3200 if (addr - block->offset < block->length) {
3201 if (addr - block->offset + *size > block->length)
3202 *size = block->length - addr + block->offset;
3203 return block->host + (addr - block->offset);
3207 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3208 abort();
3212 void qemu_put_ram_ptr(void *addr)
3214 trace_qemu_put_ram_ptr(addr);
3217 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3219 RAMBlock *block;
3220 uint8_t *host = ptr;
3222 if (xen_enabled()) {
3223 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3224 return 0;
3227 QLIST_FOREACH(block, &ram_list.blocks, next) {
3228 /* This case append when the block is not mapped. */
3229 if (block->host == NULL) {
3230 continue;
3232 if (host - block->host < block->length) {
3233 *ram_addr = block->offset + (host - block->host);
3234 return 0;
3238 return -1;
3241 /* Some of the softmmu routines need to translate from a host pointer
3242 (typically a TLB entry) back to a ram offset. */
3243 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3245 ram_addr_t ram_addr;
3247 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3248 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3249 abort();
3251 return ram_addr;
3254 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3256 #ifdef DEBUG_UNASSIGNED
3257 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3258 #endif
3259 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3260 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3261 #endif
3262 return 0;
3265 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3267 #ifdef DEBUG_UNASSIGNED
3268 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3269 #endif
3270 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3271 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3272 #endif
3273 return 0;
3276 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3278 #ifdef DEBUG_UNASSIGNED
3279 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3280 #endif
3281 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3282 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3283 #endif
3284 return 0;
3287 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3289 #ifdef DEBUG_UNASSIGNED
3290 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3291 #endif
3292 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3293 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3294 #endif
3297 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3299 #ifdef DEBUG_UNASSIGNED
3300 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3301 #endif
3302 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3303 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3304 #endif
3307 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3309 #ifdef DEBUG_UNASSIGNED
3310 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3311 #endif
3312 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3313 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3314 #endif
3317 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3318 unassigned_mem_readb,
3319 unassigned_mem_readw,
3320 unassigned_mem_readl,
3323 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3324 unassigned_mem_writeb,
3325 unassigned_mem_writew,
3326 unassigned_mem_writel,
3329 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3330 uint32_t val)
3332 int dirty_flags;
3333 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3334 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3335 #if !defined(CONFIG_USER_ONLY)
3336 tb_invalidate_phys_page_fast(ram_addr, 1);
3337 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3338 #endif
3340 stb_p(qemu_get_ram_ptr(ram_addr), val);
3341 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3342 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3343 /* we remove the notdirty callback only if the code has been
3344 flushed */
3345 if (dirty_flags == 0xff)
3346 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3349 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3350 uint32_t val)
3352 int dirty_flags;
3353 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3354 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3355 #if !defined(CONFIG_USER_ONLY)
3356 tb_invalidate_phys_page_fast(ram_addr, 2);
3357 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3358 #endif
3360 stw_p(qemu_get_ram_ptr(ram_addr), val);
3361 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3362 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3363 /* we remove the notdirty callback only if the code has been
3364 flushed */
3365 if (dirty_flags == 0xff)
3366 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3369 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3370 uint32_t val)
3372 int dirty_flags;
3373 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3374 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3375 #if !defined(CONFIG_USER_ONLY)
3376 tb_invalidate_phys_page_fast(ram_addr, 4);
3377 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3378 #endif
3380 stl_p(qemu_get_ram_ptr(ram_addr), val);
3381 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3382 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3383 /* we remove the notdirty callback only if the code has been
3384 flushed */
3385 if (dirty_flags == 0xff)
3386 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3389 static CPUReadMemoryFunc * const error_mem_read[3] = {
3390 NULL, /* never used */
3391 NULL, /* never used */
3392 NULL, /* never used */
3395 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3396 notdirty_mem_writeb,
3397 notdirty_mem_writew,
3398 notdirty_mem_writel,
3401 /* Generate a debug exception if a watchpoint has been hit. */
3402 static void check_watchpoint(int offset, int len_mask, int flags)
3404 CPUState *env = cpu_single_env;
3405 target_ulong pc, cs_base;
3406 TranslationBlock *tb;
3407 target_ulong vaddr;
3408 CPUWatchpoint *wp;
3409 int cpu_flags;
3411 if (env->watchpoint_hit) {
3412 /* We re-entered the check after replacing the TB. Now raise
3413 * the debug interrupt so that is will trigger after the
3414 * current instruction. */
3415 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3416 return;
3418 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3419 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3420 if ((vaddr == (wp->vaddr & len_mask) ||
3421 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3422 wp->flags |= BP_WATCHPOINT_HIT;
3423 if (!env->watchpoint_hit) {
3424 env->watchpoint_hit = wp;
3425 tb = tb_find_pc(env->mem_io_pc);
3426 if (!tb) {
3427 cpu_abort(env, "check_watchpoint: could not find TB for "
3428 "pc=%p", (void *)env->mem_io_pc);
3430 cpu_restore_state(tb, env, env->mem_io_pc);
3431 tb_phys_invalidate(tb, -1);
3432 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3433 env->exception_index = EXCP_DEBUG;
3434 } else {
3435 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3436 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3438 cpu_resume_from_signal(env, NULL);
3440 } else {
3441 wp->flags &= ~BP_WATCHPOINT_HIT;
3446 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3447 so these check for a hit then pass through to the normal out-of-line
3448 phys routines. */
3449 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3451 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3452 return ldub_phys(addr);
3455 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3457 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3458 return lduw_phys(addr);
3461 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3463 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3464 return ldl_phys(addr);
3467 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3468 uint32_t val)
3470 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3471 stb_phys(addr, val);
3474 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3475 uint32_t val)
3477 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3478 stw_phys(addr, val);
3481 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3482 uint32_t val)
3484 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3485 stl_phys(addr, val);
3488 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3489 watch_mem_readb,
3490 watch_mem_readw,
3491 watch_mem_readl,
3494 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3495 watch_mem_writeb,
3496 watch_mem_writew,
3497 watch_mem_writel,
3500 static inline uint32_t subpage_readlen (subpage_t *mmio,
3501 target_phys_addr_t addr,
3502 unsigned int len)
3504 unsigned int idx = SUBPAGE_IDX(addr);
3505 #if defined(DEBUG_SUBPAGE)
3506 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3507 mmio, len, addr, idx);
3508 #endif
3510 addr += mmio->region_offset[idx];
3511 idx = mmio->sub_io_index[idx];
3512 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3515 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3516 uint32_t value, unsigned int len)
3518 unsigned int idx = SUBPAGE_IDX(addr);
3519 #if defined(DEBUG_SUBPAGE)
3520 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3521 __func__, mmio, len, addr, idx, value);
3522 #endif
3524 addr += mmio->region_offset[idx];
3525 idx = mmio->sub_io_index[idx];
3526 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3529 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3531 return subpage_readlen(opaque, addr, 0);
3534 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3535 uint32_t value)
3537 subpage_writelen(opaque, addr, value, 0);
3540 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3542 return subpage_readlen(opaque, addr, 1);
3545 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3546 uint32_t value)
3548 subpage_writelen(opaque, addr, value, 1);
3551 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3553 return subpage_readlen(opaque, addr, 2);
3556 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3557 uint32_t value)
3559 subpage_writelen(opaque, addr, value, 2);
3562 static CPUReadMemoryFunc * const subpage_read[] = {
3563 &subpage_readb,
3564 &subpage_readw,
3565 &subpage_readl,
3568 static CPUWriteMemoryFunc * const subpage_write[] = {
3569 &subpage_writeb,
3570 &subpage_writew,
3571 &subpage_writel,
3574 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3575 ram_addr_t memory, ram_addr_t region_offset)
3577 int idx, eidx;
3579 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3580 return -1;
3581 idx = SUBPAGE_IDX(start);
3582 eidx = SUBPAGE_IDX(end);
3583 #if defined(DEBUG_SUBPAGE)
3584 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3585 mmio, start, end, idx, eidx, memory);
3586 #endif
3587 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3588 memory = IO_MEM_UNASSIGNED;
3589 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3590 for (; idx <= eidx; idx++) {
3591 mmio->sub_io_index[idx] = memory;
3592 mmio->region_offset[idx] = region_offset;
3595 return 0;
3598 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3599 ram_addr_t orig_memory,
3600 ram_addr_t region_offset)
3602 subpage_t *mmio;
3603 int subpage_memory;
3605 mmio = qemu_mallocz(sizeof(subpage_t));
3607 mmio->base = base;
3608 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3609 DEVICE_NATIVE_ENDIAN);
3610 #if defined(DEBUG_SUBPAGE)
3611 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3612 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3613 #endif
3614 *phys = subpage_memory | IO_MEM_SUBPAGE;
3615 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3617 return mmio;
3620 static int get_free_io_mem_idx(void)
3622 int i;
3624 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3625 if (!io_mem_used[i]) {
3626 io_mem_used[i] = 1;
3627 return i;
3629 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3630 return -1;
3634 * Usually, devices operate in little endian mode. There are devices out
3635 * there that operate in big endian too. Each device gets byte swapped
3636 * mmio if plugged onto a CPU that does the other endianness.
3638 * CPU Device swap?
3640 * little little no
3641 * little big yes
3642 * big little yes
3643 * big big no
3646 typedef struct SwapEndianContainer {
3647 CPUReadMemoryFunc *read[3];
3648 CPUWriteMemoryFunc *write[3];
3649 void *opaque;
3650 } SwapEndianContainer;
3652 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3654 uint32_t val;
3655 SwapEndianContainer *c = opaque;
3656 val = c->read[0](c->opaque, addr);
3657 return val;
3660 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3662 uint32_t val;
3663 SwapEndianContainer *c = opaque;
3664 val = bswap16(c->read[1](c->opaque, addr));
3665 return val;
3668 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3670 uint32_t val;
3671 SwapEndianContainer *c = opaque;
3672 val = bswap32(c->read[2](c->opaque, addr));
3673 return val;
3676 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3677 swapendian_mem_readb,
3678 swapendian_mem_readw,
3679 swapendian_mem_readl
3682 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3683 uint32_t val)
3685 SwapEndianContainer *c = opaque;
3686 c->write[0](c->opaque, addr, val);
3689 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3690 uint32_t val)
3692 SwapEndianContainer *c = opaque;
3693 c->write[1](c->opaque, addr, bswap16(val));
3696 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3697 uint32_t val)
3699 SwapEndianContainer *c = opaque;
3700 c->write[2](c->opaque, addr, bswap32(val));
3703 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3704 swapendian_mem_writeb,
3705 swapendian_mem_writew,
3706 swapendian_mem_writel
3709 static void swapendian_init(int io_index)
3711 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3712 int i;
3714 /* Swap mmio for big endian targets */
3715 c->opaque = io_mem_opaque[io_index];
3716 for (i = 0; i < 3; i++) {
3717 c->read[i] = io_mem_read[io_index][i];
3718 c->write[i] = io_mem_write[io_index][i];
3720 io_mem_read[io_index][i] = swapendian_readfn[i];
3721 io_mem_write[io_index][i] = swapendian_writefn[i];
3723 io_mem_opaque[io_index] = c;
3726 static void swapendian_del(int io_index)
3728 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3729 qemu_free(io_mem_opaque[io_index]);
3733 /* mem_read and mem_write are arrays of functions containing the
3734 function to access byte (index 0), word (index 1) and dword (index
3735 2). Functions can be omitted with a NULL function pointer.
3736 If io_index is non zero, the corresponding io zone is
3737 modified. If it is zero, a new io zone is allocated. The return
3738 value can be used with cpu_register_physical_memory(). (-1) is
3739 returned if error. */
3740 static int cpu_register_io_memory_fixed(int io_index,
3741 CPUReadMemoryFunc * const *mem_read,
3742 CPUWriteMemoryFunc * const *mem_write,
3743 void *opaque, enum device_endian endian)
3745 int i;
3747 if (io_index <= 0) {
3748 io_index = get_free_io_mem_idx();
3749 if (io_index == -1)
3750 return io_index;
3751 } else {
3752 io_index >>= IO_MEM_SHIFT;
3753 if (io_index >= IO_MEM_NB_ENTRIES)
3754 return -1;
3757 for (i = 0; i < 3; ++i) {
3758 io_mem_read[io_index][i]
3759 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3761 for (i = 0; i < 3; ++i) {
3762 io_mem_write[io_index][i]
3763 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3765 io_mem_opaque[io_index] = opaque;
3767 switch (endian) {
3768 case DEVICE_BIG_ENDIAN:
3769 #ifndef TARGET_WORDS_BIGENDIAN
3770 swapendian_init(io_index);
3771 #endif
3772 break;
3773 case DEVICE_LITTLE_ENDIAN:
3774 #ifdef TARGET_WORDS_BIGENDIAN
3775 swapendian_init(io_index);
3776 #endif
3777 break;
3778 case DEVICE_NATIVE_ENDIAN:
3779 default:
3780 break;
3783 return (io_index << IO_MEM_SHIFT);
3786 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3787 CPUWriteMemoryFunc * const *mem_write,
3788 void *opaque, enum device_endian endian)
3790 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3793 void cpu_unregister_io_memory(int io_table_address)
3795 int i;
3796 int io_index = io_table_address >> IO_MEM_SHIFT;
3798 swapendian_del(io_index);
3800 for (i=0;i < 3; i++) {
3801 io_mem_read[io_index][i] = unassigned_mem_read[i];
3802 io_mem_write[io_index][i] = unassigned_mem_write[i];
3804 io_mem_opaque[io_index] = NULL;
3805 io_mem_used[io_index] = 0;
3808 static void io_mem_init(void)
3810 int i;
3812 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3813 unassigned_mem_write, NULL,
3814 DEVICE_NATIVE_ENDIAN);
3815 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3816 unassigned_mem_write, NULL,
3817 DEVICE_NATIVE_ENDIAN);
3818 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3819 notdirty_mem_write, NULL,
3820 DEVICE_NATIVE_ENDIAN);
3821 for (i=0; i<5; i++)
3822 io_mem_used[i] = 1;
3824 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3825 watch_mem_write, NULL,
3826 DEVICE_NATIVE_ENDIAN);
3829 static void memory_map_init(void)
3831 system_memory = qemu_malloc(sizeof(*system_memory));
3832 memory_region_init(system_memory, "system", INT64_MAX);
3833 set_system_memory_map(system_memory);
3835 system_io = qemu_malloc(sizeof(*system_io));
3836 memory_region_init(system_io, "io", 65536);
3837 set_system_io_map(system_io);
3840 MemoryRegion *get_system_memory(void)
3842 return system_memory;
3845 MemoryRegion *get_system_io(void)
3847 return system_io;
3850 #endif /* !defined(CONFIG_USER_ONLY) */
3852 /* physical memory access (slow version, mainly for debug) */
3853 #if defined(CONFIG_USER_ONLY)
3854 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3855 uint8_t *buf, int len, int is_write)
3857 int l, flags;
3858 target_ulong page;
3859 void * p;
3861 while (len > 0) {
3862 page = addr & TARGET_PAGE_MASK;
3863 l = (page + TARGET_PAGE_SIZE) - addr;
3864 if (l > len)
3865 l = len;
3866 flags = page_get_flags(page);
3867 if (!(flags & PAGE_VALID))
3868 return -1;
3869 if (is_write) {
3870 if (!(flags & PAGE_WRITE))
3871 return -1;
3872 /* XXX: this code should not depend on lock_user */
3873 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3874 return -1;
3875 memcpy(p, buf, l);
3876 unlock_user(p, addr, l);
3877 } else {
3878 if (!(flags & PAGE_READ))
3879 return -1;
3880 /* XXX: this code should not depend on lock_user */
3881 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3882 return -1;
3883 memcpy(buf, p, l);
3884 unlock_user(p, addr, 0);
3886 len -= l;
3887 buf += l;
3888 addr += l;
3890 return 0;
3893 #else
3894 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3895 int len, int is_write)
3897 int l, io_index;
3898 uint8_t *ptr;
3899 uint32_t val;
3900 target_phys_addr_t page;
3901 ram_addr_t pd;
3902 PhysPageDesc *p;
3904 while (len > 0) {
3905 page = addr & TARGET_PAGE_MASK;
3906 l = (page + TARGET_PAGE_SIZE) - addr;
3907 if (l > len)
3908 l = len;
3909 p = phys_page_find(page >> TARGET_PAGE_BITS);
3910 if (!p) {
3911 pd = IO_MEM_UNASSIGNED;
3912 } else {
3913 pd = p->phys_offset;
3916 if (is_write) {
3917 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3918 target_phys_addr_t addr1 = addr;
3919 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3920 if (p)
3921 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3922 /* XXX: could force cpu_single_env to NULL to avoid
3923 potential bugs */
3924 if (l >= 4 && ((addr1 & 3) == 0)) {
3925 /* 32 bit write access */
3926 val = ldl_p(buf);
3927 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3928 l = 4;
3929 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3930 /* 16 bit write access */
3931 val = lduw_p(buf);
3932 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3933 l = 2;
3934 } else {
3935 /* 8 bit write access */
3936 val = ldub_p(buf);
3937 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3938 l = 1;
3940 } else {
3941 ram_addr_t addr1;
3942 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3943 /* RAM case */
3944 ptr = qemu_get_ram_ptr(addr1);
3945 memcpy(ptr, buf, l);
3946 if (!cpu_physical_memory_is_dirty(addr1)) {
3947 /* invalidate code */
3948 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3949 /* set dirty bit */
3950 cpu_physical_memory_set_dirty_flags(
3951 addr1, (0xff & ~CODE_DIRTY_FLAG));
3953 qemu_put_ram_ptr(ptr);
3955 } else {
3956 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3957 !(pd & IO_MEM_ROMD)) {
3958 target_phys_addr_t addr1 = addr;
3959 /* I/O case */
3960 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3961 if (p)
3962 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3963 if (l >= 4 && ((addr1 & 3) == 0)) {
3964 /* 32 bit read access */
3965 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3966 stl_p(buf, val);
3967 l = 4;
3968 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3969 /* 16 bit read access */
3970 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3971 stw_p(buf, val);
3972 l = 2;
3973 } else {
3974 /* 8 bit read access */
3975 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3976 stb_p(buf, val);
3977 l = 1;
3979 } else {
3980 /* RAM case */
3981 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3982 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3983 qemu_put_ram_ptr(ptr);
3986 len -= l;
3987 buf += l;
3988 addr += l;
3992 /* used for ROM loading : can write in RAM and ROM */
3993 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3994 const uint8_t *buf, int len)
3996 int l;
3997 uint8_t *ptr;
3998 target_phys_addr_t page;
3999 unsigned long pd;
4000 PhysPageDesc *p;
4002 while (len > 0) {
4003 page = addr & TARGET_PAGE_MASK;
4004 l = (page + TARGET_PAGE_SIZE) - addr;
4005 if (l > len)
4006 l = len;
4007 p = phys_page_find(page >> TARGET_PAGE_BITS);
4008 if (!p) {
4009 pd = IO_MEM_UNASSIGNED;
4010 } else {
4011 pd = p->phys_offset;
4014 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
4015 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
4016 !(pd & IO_MEM_ROMD)) {
4017 /* do nothing */
4018 } else {
4019 unsigned long addr1;
4020 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4021 /* ROM/RAM case */
4022 ptr = qemu_get_ram_ptr(addr1);
4023 memcpy(ptr, buf, l);
4024 qemu_put_ram_ptr(ptr);
4026 len -= l;
4027 buf += l;
4028 addr += l;
4032 typedef struct {
4033 void *buffer;
4034 target_phys_addr_t addr;
4035 target_phys_addr_t len;
4036 } BounceBuffer;
4038 static BounceBuffer bounce;
4040 typedef struct MapClient {
4041 void *opaque;
4042 void (*callback)(void *opaque);
4043 QLIST_ENTRY(MapClient) link;
4044 } MapClient;
4046 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4047 = QLIST_HEAD_INITIALIZER(map_client_list);
4049 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4051 MapClient *client = qemu_malloc(sizeof(*client));
4053 client->opaque = opaque;
4054 client->callback = callback;
4055 QLIST_INSERT_HEAD(&map_client_list, client, link);
4056 return client;
4059 void cpu_unregister_map_client(void *_client)
4061 MapClient *client = (MapClient *)_client;
4063 QLIST_REMOVE(client, link);
4064 qemu_free(client);
4067 static void cpu_notify_map_clients(void)
4069 MapClient *client;
4071 while (!QLIST_EMPTY(&map_client_list)) {
4072 client = QLIST_FIRST(&map_client_list);
4073 client->callback(client->opaque);
4074 cpu_unregister_map_client(client);
4078 /* Map a physical memory region into a host virtual address.
4079 * May map a subset of the requested range, given by and returned in *plen.
4080 * May return NULL if resources needed to perform the mapping are exhausted.
4081 * Use only for reads OR writes - not for read-modify-write operations.
4082 * Use cpu_register_map_client() to know when retrying the map operation is
4083 * likely to succeed.
4085 void *cpu_physical_memory_map(target_phys_addr_t addr,
4086 target_phys_addr_t *plen,
4087 int is_write)
4089 target_phys_addr_t len = *plen;
4090 target_phys_addr_t todo = 0;
4091 int l;
4092 target_phys_addr_t page;
4093 unsigned long pd;
4094 PhysPageDesc *p;
4095 ram_addr_t raddr = RAM_ADDR_MAX;
4096 ram_addr_t rlen;
4097 void *ret;
4099 while (len > 0) {
4100 page = addr & TARGET_PAGE_MASK;
4101 l = (page + TARGET_PAGE_SIZE) - addr;
4102 if (l > len)
4103 l = len;
4104 p = phys_page_find(page >> TARGET_PAGE_BITS);
4105 if (!p) {
4106 pd = IO_MEM_UNASSIGNED;
4107 } else {
4108 pd = p->phys_offset;
4111 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4112 if (todo || bounce.buffer) {
4113 break;
4115 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4116 bounce.addr = addr;
4117 bounce.len = l;
4118 if (!is_write) {
4119 cpu_physical_memory_read(addr, bounce.buffer, l);
4122 *plen = l;
4123 return bounce.buffer;
4125 if (!todo) {
4126 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4129 len -= l;
4130 addr += l;
4131 todo += l;
4133 rlen = todo;
4134 ret = qemu_ram_ptr_length(raddr, &rlen);
4135 *plen = rlen;
4136 return ret;
4139 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4140 * Will also mark the memory as dirty if is_write == 1. access_len gives
4141 * the amount of memory that was actually read or written by the caller.
4143 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4144 int is_write, target_phys_addr_t access_len)
4146 if (buffer != bounce.buffer) {
4147 if (is_write) {
4148 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4149 while (access_len) {
4150 unsigned l;
4151 l = TARGET_PAGE_SIZE;
4152 if (l > access_len)
4153 l = access_len;
4154 if (!cpu_physical_memory_is_dirty(addr1)) {
4155 /* invalidate code */
4156 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4157 /* set dirty bit */
4158 cpu_physical_memory_set_dirty_flags(
4159 addr1, (0xff & ~CODE_DIRTY_FLAG));
4161 addr1 += l;
4162 access_len -= l;
4165 if (xen_enabled()) {
4166 xen_invalidate_map_cache_entry(buffer);
4168 return;
4170 if (is_write) {
4171 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4173 qemu_vfree(bounce.buffer);
4174 bounce.buffer = NULL;
4175 cpu_notify_map_clients();
4178 /* warning: addr must be aligned */
4179 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4180 enum device_endian endian)
4182 int io_index;
4183 uint8_t *ptr;
4184 uint32_t val;
4185 unsigned long pd;
4186 PhysPageDesc *p;
4188 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4189 if (!p) {
4190 pd = IO_MEM_UNASSIGNED;
4191 } else {
4192 pd = p->phys_offset;
4195 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4196 !(pd & IO_MEM_ROMD)) {
4197 /* I/O case */
4198 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4199 if (p)
4200 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4201 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4202 #if defined(TARGET_WORDS_BIGENDIAN)
4203 if (endian == DEVICE_LITTLE_ENDIAN) {
4204 val = bswap32(val);
4206 #else
4207 if (endian == DEVICE_BIG_ENDIAN) {
4208 val = bswap32(val);
4210 #endif
4211 } else {
4212 /* RAM case */
4213 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4214 (addr & ~TARGET_PAGE_MASK);
4215 switch (endian) {
4216 case DEVICE_LITTLE_ENDIAN:
4217 val = ldl_le_p(ptr);
4218 break;
4219 case DEVICE_BIG_ENDIAN:
4220 val = ldl_be_p(ptr);
4221 break;
4222 default:
4223 val = ldl_p(ptr);
4224 break;
4227 return val;
4230 uint32_t ldl_phys(target_phys_addr_t addr)
4232 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4235 uint32_t ldl_le_phys(target_phys_addr_t addr)
4237 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4240 uint32_t ldl_be_phys(target_phys_addr_t addr)
4242 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4245 /* warning: addr must be aligned */
4246 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4247 enum device_endian endian)
4249 int io_index;
4250 uint8_t *ptr;
4251 uint64_t val;
4252 unsigned long pd;
4253 PhysPageDesc *p;
4255 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4256 if (!p) {
4257 pd = IO_MEM_UNASSIGNED;
4258 } else {
4259 pd = p->phys_offset;
4262 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4263 !(pd & IO_MEM_ROMD)) {
4264 /* I/O case */
4265 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4266 if (p)
4267 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4269 /* XXX This is broken when device endian != cpu endian.
4270 Fix and add "endian" variable check */
4271 #ifdef TARGET_WORDS_BIGENDIAN
4272 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4273 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4274 #else
4275 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4276 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4277 #endif
4278 } else {
4279 /* RAM case */
4280 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4281 (addr & ~TARGET_PAGE_MASK);
4282 switch (endian) {
4283 case DEVICE_LITTLE_ENDIAN:
4284 val = ldq_le_p(ptr);
4285 break;
4286 case DEVICE_BIG_ENDIAN:
4287 val = ldq_be_p(ptr);
4288 break;
4289 default:
4290 val = ldq_p(ptr);
4291 break;
4294 return val;
4297 uint64_t ldq_phys(target_phys_addr_t addr)
4299 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4302 uint64_t ldq_le_phys(target_phys_addr_t addr)
4304 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4307 uint64_t ldq_be_phys(target_phys_addr_t addr)
4309 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4312 /* XXX: optimize */
4313 uint32_t ldub_phys(target_phys_addr_t addr)
4315 uint8_t val;
4316 cpu_physical_memory_read(addr, &val, 1);
4317 return val;
4320 /* warning: addr must be aligned */
4321 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4322 enum device_endian endian)
4324 int io_index;
4325 uint8_t *ptr;
4326 uint64_t val;
4327 unsigned long pd;
4328 PhysPageDesc *p;
4330 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4331 if (!p) {
4332 pd = IO_MEM_UNASSIGNED;
4333 } else {
4334 pd = p->phys_offset;
4337 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4338 !(pd & IO_MEM_ROMD)) {
4339 /* I/O case */
4340 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4341 if (p)
4342 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4343 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4344 #if defined(TARGET_WORDS_BIGENDIAN)
4345 if (endian == DEVICE_LITTLE_ENDIAN) {
4346 val = bswap16(val);
4348 #else
4349 if (endian == DEVICE_BIG_ENDIAN) {
4350 val = bswap16(val);
4352 #endif
4353 } else {
4354 /* RAM case */
4355 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4356 (addr & ~TARGET_PAGE_MASK);
4357 switch (endian) {
4358 case DEVICE_LITTLE_ENDIAN:
4359 val = lduw_le_p(ptr);
4360 break;
4361 case DEVICE_BIG_ENDIAN:
4362 val = lduw_be_p(ptr);
4363 break;
4364 default:
4365 val = lduw_p(ptr);
4366 break;
4369 return val;
4372 uint32_t lduw_phys(target_phys_addr_t addr)
4374 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4377 uint32_t lduw_le_phys(target_phys_addr_t addr)
4379 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4382 uint32_t lduw_be_phys(target_phys_addr_t addr)
4384 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4387 /* warning: addr must be aligned. The ram page is not masked as dirty
4388 and the code inside is not invalidated. It is useful if the dirty
4389 bits are used to track modified PTEs */
4390 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4392 int io_index;
4393 uint8_t *ptr;
4394 unsigned long pd;
4395 PhysPageDesc *p;
4397 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4398 if (!p) {
4399 pd = IO_MEM_UNASSIGNED;
4400 } else {
4401 pd = p->phys_offset;
4404 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4405 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4406 if (p)
4407 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4408 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4409 } else {
4410 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4411 ptr = qemu_get_ram_ptr(addr1);
4412 stl_p(ptr, val);
4414 if (unlikely(in_migration)) {
4415 if (!cpu_physical_memory_is_dirty(addr1)) {
4416 /* invalidate code */
4417 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4418 /* set dirty bit */
4419 cpu_physical_memory_set_dirty_flags(
4420 addr1, (0xff & ~CODE_DIRTY_FLAG));
4426 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4428 int io_index;
4429 uint8_t *ptr;
4430 unsigned long pd;
4431 PhysPageDesc *p;
4433 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4434 if (!p) {
4435 pd = IO_MEM_UNASSIGNED;
4436 } else {
4437 pd = p->phys_offset;
4440 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4441 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4442 if (p)
4443 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4444 #ifdef TARGET_WORDS_BIGENDIAN
4445 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4446 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4447 #else
4448 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4449 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4450 #endif
4451 } else {
4452 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4453 (addr & ~TARGET_PAGE_MASK);
4454 stq_p(ptr, val);
4458 /* warning: addr must be aligned */
4459 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4460 enum device_endian endian)
4462 int io_index;
4463 uint8_t *ptr;
4464 unsigned long pd;
4465 PhysPageDesc *p;
4467 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4468 if (!p) {
4469 pd = IO_MEM_UNASSIGNED;
4470 } else {
4471 pd = p->phys_offset;
4474 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4475 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4476 if (p)
4477 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4478 #if defined(TARGET_WORDS_BIGENDIAN)
4479 if (endian == DEVICE_LITTLE_ENDIAN) {
4480 val = bswap32(val);
4482 #else
4483 if (endian == DEVICE_BIG_ENDIAN) {
4484 val = bswap32(val);
4486 #endif
4487 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4488 } else {
4489 unsigned long addr1;
4490 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4491 /* RAM case */
4492 ptr = qemu_get_ram_ptr(addr1);
4493 switch (endian) {
4494 case DEVICE_LITTLE_ENDIAN:
4495 stl_le_p(ptr, val);
4496 break;
4497 case DEVICE_BIG_ENDIAN:
4498 stl_be_p(ptr, val);
4499 break;
4500 default:
4501 stl_p(ptr, val);
4502 break;
4504 if (!cpu_physical_memory_is_dirty(addr1)) {
4505 /* invalidate code */
4506 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4507 /* set dirty bit */
4508 cpu_physical_memory_set_dirty_flags(addr1,
4509 (0xff & ~CODE_DIRTY_FLAG));
4514 void stl_phys(target_phys_addr_t addr, uint32_t val)
4516 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4519 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4521 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4524 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4526 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4529 /* XXX: optimize */
4530 void stb_phys(target_phys_addr_t addr, uint32_t val)
4532 uint8_t v = val;
4533 cpu_physical_memory_write(addr, &v, 1);
4536 /* warning: addr must be aligned */
4537 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4538 enum device_endian endian)
4540 int io_index;
4541 uint8_t *ptr;
4542 unsigned long pd;
4543 PhysPageDesc *p;
4545 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4546 if (!p) {
4547 pd = IO_MEM_UNASSIGNED;
4548 } else {
4549 pd = p->phys_offset;
4552 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4553 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4554 if (p)
4555 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4556 #if defined(TARGET_WORDS_BIGENDIAN)
4557 if (endian == DEVICE_LITTLE_ENDIAN) {
4558 val = bswap16(val);
4560 #else
4561 if (endian == DEVICE_BIG_ENDIAN) {
4562 val = bswap16(val);
4564 #endif
4565 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4566 } else {
4567 unsigned long addr1;
4568 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4569 /* RAM case */
4570 ptr = qemu_get_ram_ptr(addr1);
4571 switch (endian) {
4572 case DEVICE_LITTLE_ENDIAN:
4573 stw_le_p(ptr, val);
4574 break;
4575 case DEVICE_BIG_ENDIAN:
4576 stw_be_p(ptr, val);
4577 break;
4578 default:
4579 stw_p(ptr, val);
4580 break;
4582 if (!cpu_physical_memory_is_dirty(addr1)) {
4583 /* invalidate code */
4584 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4585 /* set dirty bit */
4586 cpu_physical_memory_set_dirty_flags(addr1,
4587 (0xff & ~CODE_DIRTY_FLAG));
4592 void stw_phys(target_phys_addr_t addr, uint32_t val)
4594 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4597 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4599 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4602 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4604 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4607 /* XXX: optimize */
4608 void stq_phys(target_phys_addr_t addr, uint64_t val)
4610 val = tswap64(val);
4611 cpu_physical_memory_write(addr, &val, 8);
4614 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4616 val = cpu_to_le64(val);
4617 cpu_physical_memory_write(addr, &val, 8);
4620 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4622 val = cpu_to_be64(val);
4623 cpu_physical_memory_write(addr, &val, 8);
4626 /* virtual memory access for debug (includes writing to ROM) */
4627 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4628 uint8_t *buf, int len, int is_write)
4630 int l;
4631 target_phys_addr_t phys_addr;
4632 target_ulong page;
4634 while (len > 0) {
4635 page = addr & TARGET_PAGE_MASK;
4636 phys_addr = cpu_get_phys_page_debug(env, page);
4637 /* if no physical page mapped, return an error */
4638 if (phys_addr == -1)
4639 return -1;
4640 l = (page + TARGET_PAGE_SIZE) - addr;
4641 if (l > len)
4642 l = len;
4643 phys_addr += (addr & ~TARGET_PAGE_MASK);
4644 if (is_write)
4645 cpu_physical_memory_write_rom(phys_addr, buf, l);
4646 else
4647 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4648 len -= l;
4649 buf += l;
4650 addr += l;
4652 return 0;
4654 #endif
4656 /* in deterministic execution mode, instructions doing device I/Os
4657 must be at the end of the TB */
4658 void cpu_io_recompile(CPUState *env, void *retaddr)
4660 TranslationBlock *tb;
4661 uint32_t n, cflags;
4662 target_ulong pc, cs_base;
4663 uint64_t flags;
4665 tb = tb_find_pc((unsigned long)retaddr);
4666 if (!tb) {
4667 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4668 retaddr);
4670 n = env->icount_decr.u16.low + tb->icount;
4671 cpu_restore_state(tb, env, (unsigned long)retaddr);
4672 /* Calculate how many instructions had been executed before the fault
4673 occurred. */
4674 n = n - env->icount_decr.u16.low;
4675 /* Generate a new TB ending on the I/O insn. */
4676 n++;
4677 /* On MIPS and SH, delay slot instructions can only be restarted if
4678 they were already the first instruction in the TB. If this is not
4679 the first instruction in a TB then re-execute the preceding
4680 branch. */
4681 #if defined(TARGET_MIPS)
4682 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4683 env->active_tc.PC -= 4;
4684 env->icount_decr.u16.low++;
4685 env->hflags &= ~MIPS_HFLAG_BMASK;
4687 #elif defined(TARGET_SH4)
4688 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4689 && n > 1) {
4690 env->pc -= 2;
4691 env->icount_decr.u16.low++;
4692 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4694 #endif
4695 /* This should never happen. */
4696 if (n > CF_COUNT_MASK)
4697 cpu_abort(env, "TB too big during recompile");
4699 cflags = n | CF_LAST_IO;
4700 pc = tb->pc;
4701 cs_base = tb->cs_base;
4702 flags = tb->flags;
4703 tb_phys_invalidate(tb, -1);
4704 /* FIXME: In theory this could raise an exception. In practice
4705 we have already translated the block once so it's probably ok. */
4706 tb_gen_code(env, pc, cs_base, flags, cflags);
4707 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4708 the first in the TB) then we end up generating a whole new TB and
4709 repeating the fault, which is horribly inefficient.
4710 Better would be to execute just this insn uncached, or generate a
4711 second new TB. */
4712 cpu_resume_from_signal(env, NULL);
4715 #if !defined(CONFIG_USER_ONLY)
4717 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4719 int i, target_code_size, max_target_code_size;
4720 int direct_jmp_count, direct_jmp2_count, cross_page;
4721 TranslationBlock *tb;
4723 target_code_size = 0;
4724 max_target_code_size = 0;
4725 cross_page = 0;
4726 direct_jmp_count = 0;
4727 direct_jmp2_count = 0;
4728 for(i = 0; i < nb_tbs; i++) {
4729 tb = &tbs[i];
4730 target_code_size += tb->size;
4731 if (tb->size > max_target_code_size)
4732 max_target_code_size = tb->size;
4733 if (tb->page_addr[1] != -1)
4734 cross_page++;
4735 if (tb->tb_next_offset[0] != 0xffff) {
4736 direct_jmp_count++;
4737 if (tb->tb_next_offset[1] != 0xffff) {
4738 direct_jmp2_count++;
4742 /* XXX: avoid using doubles ? */
4743 cpu_fprintf(f, "Translation buffer state:\n");
4744 cpu_fprintf(f, "gen code size %td/%ld\n",
4745 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4746 cpu_fprintf(f, "TB count %d/%d\n",
4747 nb_tbs, code_gen_max_blocks);
4748 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4749 nb_tbs ? target_code_size / nb_tbs : 0,
4750 max_target_code_size);
4751 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4752 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4753 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4754 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4755 cross_page,
4756 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4757 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4758 direct_jmp_count,
4759 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4760 direct_jmp2_count,
4761 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4762 cpu_fprintf(f, "\nStatistics:\n");
4763 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4764 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4765 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4766 tcg_dump_info(f, cpu_fprintf);
4769 #define MMUSUFFIX _cmmu
4770 #define GETPC() NULL
4771 #define env cpu_single_env
4772 #define SOFTMMU_CODE_ACCESS
4774 #define SHIFT 0
4775 #include "softmmu_template.h"
4777 #define SHIFT 1
4778 #include "softmmu_template.h"
4780 #define SHIFT 2
4781 #include "softmmu_template.h"
4783 #define SHIFT 3
4784 #include "softmmu_template.h"
4786 #undef env
4788 #endif