Move daemonize handling to OS specific files
[qemu.git] / exec.c
blob427379740a6b9c7dade11198650939bc02150f41
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <unistd.h>
32 #include <inttypes.h>
34 #include "cpu.h"
35 #include "exec-all.h"
36 #include "qemu-common.h"
37 #include "tcg.h"
38 #include "hw/hw.h"
39 #include "osdep.h"
40 #include "kvm.h"
41 #include "qemu-timer.h"
42 #if defined(CONFIG_USER_ONLY)
43 #include <qemu.h>
44 #include <signal.h>
45 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
46 #include <sys/param.h>
47 #if __FreeBSD_version >= 700104
48 #define HAVE_KINFO_GETVMMAP
49 #define sigqueue sigqueue_freebsd /* avoid redefinition */
50 #include <sys/time.h>
51 #include <sys/proc.h>
52 #include <machine/profile.h>
53 #define _KERNEL
54 #include <sys/user.h>
55 #undef _KERNEL
56 #undef sigqueue
57 #include <libutil.h>
58 #endif
59 #endif
60 #endif
62 //#define DEBUG_TB_INVALIDATE
63 //#define DEBUG_FLUSH
64 //#define DEBUG_TLB
65 //#define DEBUG_UNASSIGNED
67 /* make various TB consistency checks */
68 //#define DEBUG_TB_CHECK
69 //#define DEBUG_TLB_CHECK
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
79 #define SMC_BITMAP_USE_THRESHOLD 10
81 static TranslationBlock *tbs;
82 int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88 #if defined(__arm__) || defined(__sparc_v9__)
89 /* The prologue must be reachable with a direct jump. ARM and Sparc64
90 have limited branch ranges (possibly also PPC) so place it in a
91 section close to code segment. */
92 #define code_gen_section \
93 __attribute__((__section__(".gen_code"))) \
94 __attribute__((aligned (32)))
95 #elif defined(_WIN32)
96 /* Maximum alignment for Win32 is 16. */
97 #define code_gen_section \
98 __attribute__((aligned (16)))
99 #else
100 #define code_gen_section \
101 __attribute__((aligned (32)))
102 #endif
104 uint8_t code_gen_prologue[1024] code_gen_section;
105 static uint8_t *code_gen_buffer;
106 static unsigned long code_gen_buffer_size;
107 /* threshold to flush the translated code buffer */
108 static unsigned long code_gen_buffer_max_size;
109 uint8_t *code_gen_ptr;
111 #if !defined(CONFIG_USER_ONLY)
112 int phys_ram_fd;
113 uint8_t *phys_ram_dirty;
114 static int in_migration;
116 typedef struct RAMBlock {
117 uint8_t *host;
118 ram_addr_t offset;
119 ram_addr_t length;
120 struct RAMBlock *next;
121 } RAMBlock;
123 static RAMBlock *ram_blocks;
124 /* TODO: When we implement (and use) ram deallocation (e.g. for hotplug)
125 then we can no longer assume contiguous ram offsets, and external uses
126 of this variable will break. */
127 ram_addr_t last_ram_offset;
128 #endif
130 CPUState *first_cpu;
131 /* current CPU in the current thread. It is only valid inside
132 cpu_exec() */
133 CPUState *cpu_single_env;
134 /* 0 = Do not count executed instructions.
135 1 = Precise instruction counting.
136 2 = Adaptive rate instruction counting. */
137 int use_icount = 0;
138 /* Current instruction counter. While executing translated code this may
139 include some instructions that have not yet been executed. */
140 int64_t qemu_icount;
142 typedef struct PageDesc {
143 /* list of TBs intersecting this ram page */
144 TranslationBlock *first_tb;
145 /* in order to optimize self modifying code, we count the number
146 of lookups we do to a given page to use a bitmap */
147 unsigned int code_write_count;
148 uint8_t *code_bitmap;
149 #if defined(CONFIG_USER_ONLY)
150 unsigned long flags;
151 #endif
152 } PageDesc;
154 /* In system mode we want L1_MAP to be based on ram offsets,
155 while in user mode we want it to be based on virtual addresses. */
156 #if !defined(CONFIG_USER_ONLY)
157 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
158 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
159 #else
160 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
161 #endif
162 #else
163 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
164 #endif
166 /* Size of the L2 (and L3, etc) page tables. */
167 #define L2_BITS 10
168 #define L2_SIZE (1 << L2_BITS)
170 /* The bits remaining after N lower levels of page tables. */
171 #define P_L1_BITS_REM \
172 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
173 #define V_L1_BITS_REM \
174 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
176 /* Size of the L1 page table. Avoid silly small sizes. */
177 #if P_L1_BITS_REM < 4
178 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
179 #else
180 #define P_L1_BITS P_L1_BITS_REM
181 #endif
183 #if V_L1_BITS_REM < 4
184 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
185 #else
186 #define V_L1_BITS V_L1_BITS_REM
187 #endif
189 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
190 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
192 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
193 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
195 unsigned long qemu_real_host_page_size;
196 unsigned long qemu_host_page_bits;
197 unsigned long qemu_host_page_size;
198 unsigned long qemu_host_page_mask;
200 /* This is a multi-level map on the virtual address space.
201 The bottom level has pointers to PageDesc. */
202 static void *l1_map[V_L1_SIZE];
204 #if !defined(CONFIG_USER_ONLY)
205 typedef struct PhysPageDesc {
206 /* offset in host memory of the page + io_index in the low bits */
207 ram_addr_t phys_offset;
208 ram_addr_t region_offset;
209 } PhysPageDesc;
211 /* This is a multi-level map on the physical address space.
212 The bottom level has pointers to PhysPageDesc. */
213 static void *l1_phys_map[P_L1_SIZE];
215 static void io_mem_init(void);
217 /* io memory support */
218 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
219 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
220 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
221 static char io_mem_used[IO_MEM_NB_ENTRIES];
222 static int io_mem_watch;
223 #endif
225 /* log support */
226 #ifdef WIN32
227 static const char *logfilename = "qemu.log";
228 #else
229 static const char *logfilename = "/tmp/qemu.log";
230 #endif
231 FILE *logfile;
232 int loglevel;
233 static int log_append = 0;
235 /* statistics */
236 #if !defined(CONFIG_USER_ONLY)
237 static int tlb_flush_count;
238 #endif
239 static int tb_flush_count;
240 static int tb_phys_invalidate_count;
242 #ifdef _WIN32
243 static void map_exec(void *addr, long size)
245 DWORD old_protect;
246 VirtualProtect(addr, size,
247 PAGE_EXECUTE_READWRITE, &old_protect);
250 #else
251 static void map_exec(void *addr, long size)
253 unsigned long start, end, page_size;
255 page_size = getpagesize();
256 start = (unsigned long)addr;
257 start &= ~(page_size - 1);
259 end = (unsigned long)addr + size;
260 end += page_size - 1;
261 end &= ~(page_size - 1);
263 mprotect((void *)start, end - start,
264 PROT_READ | PROT_WRITE | PROT_EXEC);
266 #endif
268 static void page_init(void)
270 /* NOTE: we can always suppose that qemu_host_page_size >=
271 TARGET_PAGE_SIZE */
272 #ifdef _WIN32
274 SYSTEM_INFO system_info;
276 GetSystemInfo(&system_info);
277 qemu_real_host_page_size = system_info.dwPageSize;
279 #else
280 qemu_real_host_page_size = getpagesize();
281 #endif
282 if (qemu_host_page_size == 0)
283 qemu_host_page_size = qemu_real_host_page_size;
284 if (qemu_host_page_size < TARGET_PAGE_SIZE)
285 qemu_host_page_size = TARGET_PAGE_SIZE;
286 qemu_host_page_bits = 0;
287 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
288 qemu_host_page_bits++;
289 qemu_host_page_mask = ~(qemu_host_page_size - 1);
291 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
293 #ifdef HAVE_KINFO_GETVMMAP
294 struct kinfo_vmentry *freep;
295 int i, cnt;
297 freep = kinfo_getvmmap(getpid(), &cnt);
298 if (freep) {
299 mmap_lock();
300 for (i = 0; i < cnt; i++) {
301 unsigned long startaddr, endaddr;
303 startaddr = freep[i].kve_start;
304 endaddr = freep[i].kve_end;
305 if (h2g_valid(startaddr)) {
306 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
308 if (h2g_valid(endaddr)) {
309 endaddr = h2g(endaddr);
310 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
311 } else {
312 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
313 endaddr = ~0ul;
314 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
315 #endif
319 free(freep);
320 mmap_unlock();
322 #else
323 FILE *f;
325 last_brk = (unsigned long)sbrk(0);
327 f = fopen("/compat/linux/proc/self/maps", "r");
328 if (f) {
329 mmap_lock();
331 do {
332 unsigned long startaddr, endaddr;
333 int n;
335 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
337 if (n == 2 && h2g_valid(startaddr)) {
338 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
340 if (h2g_valid(endaddr)) {
341 endaddr = h2g(endaddr);
342 } else {
343 endaddr = ~0ul;
345 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
347 } while (!feof(f));
349 fclose(f);
350 mmap_unlock();
352 #endif
354 #endif
357 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
359 PageDesc *pd;
360 void **lp;
361 int i;
363 #if defined(CONFIG_USER_ONLY)
364 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
365 # define ALLOC(P, SIZE) \
366 do { \
367 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
368 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
369 } while (0)
370 #else
371 # define ALLOC(P, SIZE) \
372 do { P = qemu_mallocz(SIZE); } while (0)
373 #endif
375 /* Level 1. Always allocated. */
376 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
378 /* Level 2..N-1. */
379 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
380 void **p = *lp;
382 if (p == NULL) {
383 if (!alloc) {
384 return NULL;
386 ALLOC(p, sizeof(void *) * L2_SIZE);
387 *lp = p;
390 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
393 pd = *lp;
394 if (pd == NULL) {
395 if (!alloc) {
396 return NULL;
398 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
399 *lp = pd;
402 #undef ALLOC
404 return pd + (index & (L2_SIZE - 1));
407 static inline PageDesc *page_find(tb_page_addr_t index)
409 return page_find_alloc(index, 0);
412 #if !defined(CONFIG_USER_ONLY)
413 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
415 PhysPageDesc *pd;
416 void **lp;
417 int i;
419 /* Level 1. Always allocated. */
420 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
422 /* Level 2..N-1. */
423 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
424 void **p = *lp;
425 if (p == NULL) {
426 if (!alloc) {
427 return NULL;
429 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
431 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
434 pd = *lp;
435 if (pd == NULL) {
436 int i;
438 if (!alloc) {
439 return NULL;
442 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
444 for (i = 0; i < L2_SIZE; i++) {
445 pd[i].phys_offset = IO_MEM_UNASSIGNED;
446 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
450 return pd + (index & (L2_SIZE - 1));
453 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
455 return phys_page_find_alloc(index, 0);
458 static void tlb_protect_code(ram_addr_t ram_addr);
459 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
460 target_ulong vaddr);
461 #define mmap_lock() do { } while(0)
462 #define mmap_unlock() do { } while(0)
463 #endif
465 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
467 #if defined(CONFIG_USER_ONLY)
468 /* Currently it is not recommended to allocate big chunks of data in
469 user mode. It will change when a dedicated libc will be used */
470 #define USE_STATIC_CODE_GEN_BUFFER
471 #endif
473 #ifdef USE_STATIC_CODE_GEN_BUFFER
474 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
475 __attribute__((aligned (CODE_GEN_ALIGN)));
476 #endif
478 static void code_gen_alloc(unsigned long tb_size)
480 #ifdef USE_STATIC_CODE_GEN_BUFFER
481 code_gen_buffer = static_code_gen_buffer;
482 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
483 map_exec(code_gen_buffer, code_gen_buffer_size);
484 #else
485 code_gen_buffer_size = tb_size;
486 if (code_gen_buffer_size == 0) {
487 #if defined(CONFIG_USER_ONLY)
488 /* in user mode, phys_ram_size is not meaningful */
489 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
490 #else
491 /* XXX: needs adjustments */
492 code_gen_buffer_size = (unsigned long)(ram_size / 4);
493 #endif
495 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
496 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
497 /* The code gen buffer location may have constraints depending on
498 the host cpu and OS */
499 #if defined(__linux__)
501 int flags;
502 void *start = NULL;
504 flags = MAP_PRIVATE | MAP_ANONYMOUS;
505 #if defined(__x86_64__)
506 flags |= MAP_32BIT;
507 /* Cannot map more than that */
508 if (code_gen_buffer_size > (800 * 1024 * 1024))
509 code_gen_buffer_size = (800 * 1024 * 1024);
510 #elif defined(__sparc_v9__)
511 // Map the buffer below 2G, so we can use direct calls and branches
512 flags |= MAP_FIXED;
513 start = (void *) 0x60000000UL;
514 if (code_gen_buffer_size > (512 * 1024 * 1024))
515 code_gen_buffer_size = (512 * 1024 * 1024);
516 #elif defined(__arm__)
517 /* Map the buffer below 32M, so we can use direct calls and branches */
518 flags |= MAP_FIXED;
519 start = (void *) 0x01000000UL;
520 if (code_gen_buffer_size > 16 * 1024 * 1024)
521 code_gen_buffer_size = 16 * 1024 * 1024;
522 #elif defined(__s390x__)
523 /* Map the buffer so that we can use direct calls and branches. */
524 /* We have a +- 4GB range on the branches; leave some slop. */
525 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
526 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
528 start = (void *)0x90000000UL;
529 #endif
530 code_gen_buffer = mmap(start, code_gen_buffer_size,
531 PROT_WRITE | PROT_READ | PROT_EXEC,
532 flags, -1, 0);
533 if (code_gen_buffer == MAP_FAILED) {
534 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
535 exit(1);
538 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
540 int flags;
541 void *addr = NULL;
542 flags = MAP_PRIVATE | MAP_ANONYMOUS;
543 #if defined(__x86_64__)
544 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
545 * 0x40000000 is free */
546 flags |= MAP_FIXED;
547 addr = (void *)0x40000000;
548 /* Cannot map more than that */
549 if (code_gen_buffer_size > (800 * 1024 * 1024))
550 code_gen_buffer_size = (800 * 1024 * 1024);
551 #endif
552 code_gen_buffer = mmap(addr, code_gen_buffer_size,
553 PROT_WRITE | PROT_READ | PROT_EXEC,
554 flags, -1, 0);
555 if (code_gen_buffer == MAP_FAILED) {
556 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
557 exit(1);
560 #else
561 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
562 map_exec(code_gen_buffer, code_gen_buffer_size);
563 #endif
564 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
565 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
566 code_gen_buffer_max_size = code_gen_buffer_size -
567 (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
568 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
569 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
572 /* Must be called before using the QEMU cpus. 'tb_size' is the size
573 (in bytes) allocated to the translation buffer. Zero means default
574 size. */
575 void cpu_exec_init_all(unsigned long tb_size)
577 cpu_gen_init();
578 code_gen_alloc(tb_size);
579 code_gen_ptr = code_gen_buffer;
580 page_init();
581 #if !defined(CONFIG_USER_ONLY)
582 io_mem_init();
583 #endif
584 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
585 /* There's no guest base to take into account, so go ahead and
586 initialize the prologue now. */
587 tcg_prologue_init(&tcg_ctx);
588 #endif
591 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
593 static int cpu_common_post_load(void *opaque, int version_id)
595 CPUState *env = opaque;
597 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
598 version_id is increased. */
599 env->interrupt_request &= ~0x01;
600 tlb_flush(env, 1);
602 return 0;
605 static const VMStateDescription vmstate_cpu_common = {
606 .name = "cpu_common",
607 .version_id = 1,
608 .minimum_version_id = 1,
609 .minimum_version_id_old = 1,
610 .post_load = cpu_common_post_load,
611 .fields = (VMStateField []) {
612 VMSTATE_UINT32(halted, CPUState),
613 VMSTATE_UINT32(interrupt_request, CPUState),
614 VMSTATE_END_OF_LIST()
617 #endif
619 CPUState *qemu_get_cpu(int cpu)
621 CPUState *env = first_cpu;
623 while (env) {
624 if (env->cpu_index == cpu)
625 break;
626 env = env->next_cpu;
629 return env;
632 void cpu_exec_init(CPUState *env)
634 CPUState **penv;
635 int cpu_index;
637 #if defined(CONFIG_USER_ONLY)
638 cpu_list_lock();
639 #endif
640 env->next_cpu = NULL;
641 penv = &first_cpu;
642 cpu_index = 0;
643 while (*penv != NULL) {
644 penv = &(*penv)->next_cpu;
645 cpu_index++;
647 env->cpu_index = cpu_index;
648 env->numa_node = 0;
649 QTAILQ_INIT(&env->breakpoints);
650 QTAILQ_INIT(&env->watchpoints);
651 *penv = env;
652 #if defined(CONFIG_USER_ONLY)
653 cpu_list_unlock();
654 #endif
655 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
656 vmstate_register(cpu_index, &vmstate_cpu_common, env);
657 register_savevm("cpu", cpu_index, CPU_SAVE_VERSION,
658 cpu_save, cpu_load, env);
659 #endif
662 static inline void invalidate_page_bitmap(PageDesc *p)
664 if (p->code_bitmap) {
665 qemu_free(p->code_bitmap);
666 p->code_bitmap = NULL;
668 p->code_write_count = 0;
671 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
673 static void page_flush_tb_1 (int level, void **lp)
675 int i;
677 if (*lp == NULL) {
678 return;
680 if (level == 0) {
681 PageDesc *pd = *lp;
682 for (i = 0; i < L2_SIZE; ++i) {
683 pd[i].first_tb = NULL;
684 invalidate_page_bitmap(pd + i);
686 } else {
687 void **pp = *lp;
688 for (i = 0; i < L2_SIZE; ++i) {
689 page_flush_tb_1 (level - 1, pp + i);
694 static void page_flush_tb(void)
696 int i;
697 for (i = 0; i < V_L1_SIZE; i++) {
698 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
702 /* flush all the translation blocks */
703 /* XXX: tb_flush is currently not thread safe */
704 void tb_flush(CPUState *env1)
706 CPUState *env;
707 #if defined(DEBUG_FLUSH)
708 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
709 (unsigned long)(code_gen_ptr - code_gen_buffer),
710 nb_tbs, nb_tbs > 0 ?
711 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
712 #endif
713 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
714 cpu_abort(env1, "Internal error: code buffer overflow\n");
716 nb_tbs = 0;
718 for(env = first_cpu; env != NULL; env = env->next_cpu) {
719 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
722 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
723 page_flush_tb();
725 code_gen_ptr = code_gen_buffer;
726 /* XXX: flush processor icache at this point if cache flush is
727 expensive */
728 tb_flush_count++;
731 #ifdef DEBUG_TB_CHECK
733 static void tb_invalidate_check(target_ulong address)
735 TranslationBlock *tb;
736 int i;
737 address &= TARGET_PAGE_MASK;
738 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
739 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
740 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
741 address >= tb->pc + tb->size)) {
742 printf("ERROR invalidate: address=" TARGET_FMT_lx
743 " PC=%08lx size=%04x\n",
744 address, (long)tb->pc, tb->size);
750 /* verify that all the pages have correct rights for code */
751 static void tb_page_check(void)
753 TranslationBlock *tb;
754 int i, flags1, flags2;
756 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
757 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
758 flags1 = page_get_flags(tb->pc);
759 flags2 = page_get_flags(tb->pc + tb->size - 1);
760 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
761 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
762 (long)tb->pc, tb->size, flags1, flags2);
768 #endif
770 /* invalidate one TB */
771 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
772 int next_offset)
774 TranslationBlock *tb1;
775 for(;;) {
776 tb1 = *ptb;
777 if (tb1 == tb) {
778 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
779 break;
781 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
785 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
787 TranslationBlock *tb1;
788 unsigned int n1;
790 for(;;) {
791 tb1 = *ptb;
792 n1 = (long)tb1 & 3;
793 tb1 = (TranslationBlock *)((long)tb1 & ~3);
794 if (tb1 == tb) {
795 *ptb = tb1->page_next[n1];
796 break;
798 ptb = &tb1->page_next[n1];
802 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
804 TranslationBlock *tb1, **ptb;
805 unsigned int n1;
807 ptb = &tb->jmp_next[n];
808 tb1 = *ptb;
809 if (tb1) {
810 /* find tb(n) in circular list */
811 for(;;) {
812 tb1 = *ptb;
813 n1 = (long)tb1 & 3;
814 tb1 = (TranslationBlock *)((long)tb1 & ~3);
815 if (n1 == n && tb1 == tb)
816 break;
817 if (n1 == 2) {
818 ptb = &tb1->jmp_first;
819 } else {
820 ptb = &tb1->jmp_next[n1];
823 /* now we can suppress tb(n) from the list */
824 *ptb = tb->jmp_next[n];
826 tb->jmp_next[n] = NULL;
830 /* reset the jump entry 'n' of a TB so that it is not chained to
831 another TB */
832 static inline void tb_reset_jump(TranslationBlock *tb, int n)
834 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
837 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
839 CPUState *env;
840 PageDesc *p;
841 unsigned int h, n1;
842 tb_page_addr_t phys_pc;
843 TranslationBlock *tb1, *tb2;
845 /* remove the TB from the hash list */
846 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
847 h = tb_phys_hash_func(phys_pc);
848 tb_remove(&tb_phys_hash[h], tb,
849 offsetof(TranslationBlock, phys_hash_next));
851 /* remove the TB from the page list */
852 if (tb->page_addr[0] != page_addr) {
853 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
854 tb_page_remove(&p->first_tb, tb);
855 invalidate_page_bitmap(p);
857 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
858 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
859 tb_page_remove(&p->first_tb, tb);
860 invalidate_page_bitmap(p);
863 tb_invalidated_flag = 1;
865 /* remove the TB from the hash list */
866 h = tb_jmp_cache_hash_func(tb->pc);
867 for(env = first_cpu; env != NULL; env = env->next_cpu) {
868 if (env->tb_jmp_cache[h] == tb)
869 env->tb_jmp_cache[h] = NULL;
872 /* suppress this TB from the two jump lists */
873 tb_jmp_remove(tb, 0);
874 tb_jmp_remove(tb, 1);
876 /* suppress any remaining jumps to this TB */
877 tb1 = tb->jmp_first;
878 for(;;) {
879 n1 = (long)tb1 & 3;
880 if (n1 == 2)
881 break;
882 tb1 = (TranslationBlock *)((long)tb1 & ~3);
883 tb2 = tb1->jmp_next[n1];
884 tb_reset_jump(tb1, n1);
885 tb1->jmp_next[n1] = NULL;
886 tb1 = tb2;
888 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
890 tb_phys_invalidate_count++;
893 static inline void set_bits(uint8_t *tab, int start, int len)
895 int end, mask, end1;
897 end = start + len;
898 tab += start >> 3;
899 mask = 0xff << (start & 7);
900 if ((start & ~7) == (end & ~7)) {
901 if (start < end) {
902 mask &= ~(0xff << (end & 7));
903 *tab |= mask;
905 } else {
906 *tab++ |= mask;
907 start = (start + 8) & ~7;
908 end1 = end & ~7;
909 while (start < end1) {
910 *tab++ = 0xff;
911 start += 8;
913 if (start < end) {
914 mask = ~(0xff << (end & 7));
915 *tab |= mask;
920 static void build_page_bitmap(PageDesc *p)
922 int n, tb_start, tb_end;
923 TranslationBlock *tb;
925 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
927 tb = p->first_tb;
928 while (tb != NULL) {
929 n = (long)tb & 3;
930 tb = (TranslationBlock *)((long)tb & ~3);
931 /* NOTE: this is subtle as a TB may span two physical pages */
932 if (n == 0) {
933 /* NOTE: tb_end may be after the end of the page, but
934 it is not a problem */
935 tb_start = tb->pc & ~TARGET_PAGE_MASK;
936 tb_end = tb_start + tb->size;
937 if (tb_end > TARGET_PAGE_SIZE)
938 tb_end = TARGET_PAGE_SIZE;
939 } else {
940 tb_start = 0;
941 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
943 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
944 tb = tb->page_next[n];
948 TranslationBlock *tb_gen_code(CPUState *env,
949 target_ulong pc, target_ulong cs_base,
950 int flags, int cflags)
952 TranslationBlock *tb;
953 uint8_t *tc_ptr;
954 tb_page_addr_t phys_pc, phys_page2;
955 target_ulong virt_page2;
956 int code_gen_size;
958 phys_pc = get_page_addr_code(env, pc);
959 tb = tb_alloc(pc);
960 if (!tb) {
961 /* flush must be done */
962 tb_flush(env);
963 /* cannot fail at this point */
964 tb = tb_alloc(pc);
965 /* Don't forget to invalidate previous TB info. */
966 tb_invalidated_flag = 1;
968 tc_ptr = code_gen_ptr;
969 tb->tc_ptr = tc_ptr;
970 tb->cs_base = cs_base;
971 tb->flags = flags;
972 tb->cflags = cflags;
973 cpu_gen_code(env, tb, &code_gen_size);
974 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
976 /* check next page if needed */
977 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
978 phys_page2 = -1;
979 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
980 phys_page2 = get_page_addr_code(env, virt_page2);
982 tb_link_page(tb, phys_pc, phys_page2);
983 return tb;
986 /* invalidate all TBs which intersect with the target physical page
987 starting in range [start;end[. NOTE: start and end must refer to
988 the same physical page. 'is_cpu_write_access' should be true if called
989 from a real cpu write access: the virtual CPU will exit the current
990 TB if code is modified inside this TB. */
991 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
992 int is_cpu_write_access)
994 TranslationBlock *tb, *tb_next, *saved_tb;
995 CPUState *env = cpu_single_env;
996 tb_page_addr_t tb_start, tb_end;
997 PageDesc *p;
998 int n;
999 #ifdef TARGET_HAS_PRECISE_SMC
1000 int current_tb_not_found = is_cpu_write_access;
1001 TranslationBlock *current_tb = NULL;
1002 int current_tb_modified = 0;
1003 target_ulong current_pc = 0;
1004 target_ulong current_cs_base = 0;
1005 int current_flags = 0;
1006 #endif /* TARGET_HAS_PRECISE_SMC */
1008 p = page_find(start >> TARGET_PAGE_BITS);
1009 if (!p)
1010 return;
1011 if (!p->code_bitmap &&
1012 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1013 is_cpu_write_access) {
1014 /* build code bitmap */
1015 build_page_bitmap(p);
1018 /* we remove all the TBs in the range [start, end[ */
1019 /* XXX: see if in some cases it could be faster to invalidate all the code */
1020 tb = p->first_tb;
1021 while (tb != NULL) {
1022 n = (long)tb & 3;
1023 tb = (TranslationBlock *)((long)tb & ~3);
1024 tb_next = tb->page_next[n];
1025 /* NOTE: this is subtle as a TB may span two physical pages */
1026 if (n == 0) {
1027 /* NOTE: tb_end may be after the end of the page, but
1028 it is not a problem */
1029 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1030 tb_end = tb_start + tb->size;
1031 } else {
1032 tb_start = tb->page_addr[1];
1033 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1035 if (!(tb_end <= start || tb_start >= end)) {
1036 #ifdef TARGET_HAS_PRECISE_SMC
1037 if (current_tb_not_found) {
1038 current_tb_not_found = 0;
1039 current_tb = NULL;
1040 if (env->mem_io_pc) {
1041 /* now we have a real cpu fault */
1042 current_tb = tb_find_pc(env->mem_io_pc);
1045 if (current_tb == tb &&
1046 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1047 /* If we are modifying the current TB, we must stop
1048 its execution. We could be more precise by checking
1049 that the modification is after the current PC, but it
1050 would require a specialized function to partially
1051 restore the CPU state */
1053 current_tb_modified = 1;
1054 cpu_restore_state(current_tb, env,
1055 env->mem_io_pc, NULL);
1056 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1057 &current_flags);
1059 #endif /* TARGET_HAS_PRECISE_SMC */
1060 /* we need to do that to handle the case where a signal
1061 occurs while doing tb_phys_invalidate() */
1062 saved_tb = NULL;
1063 if (env) {
1064 saved_tb = env->current_tb;
1065 env->current_tb = NULL;
1067 tb_phys_invalidate(tb, -1);
1068 if (env) {
1069 env->current_tb = saved_tb;
1070 if (env->interrupt_request && env->current_tb)
1071 cpu_interrupt(env, env->interrupt_request);
1074 tb = tb_next;
1076 #if !defined(CONFIG_USER_ONLY)
1077 /* if no code remaining, no need to continue to use slow writes */
1078 if (!p->first_tb) {
1079 invalidate_page_bitmap(p);
1080 if (is_cpu_write_access) {
1081 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1084 #endif
1085 #ifdef TARGET_HAS_PRECISE_SMC
1086 if (current_tb_modified) {
1087 /* we generate a block containing just the instruction
1088 modifying the memory. It will ensure that it cannot modify
1089 itself */
1090 env->current_tb = NULL;
1091 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1092 cpu_resume_from_signal(env, NULL);
1094 #endif
1097 /* len must be <= 8 and start must be a multiple of len */
1098 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1100 PageDesc *p;
1101 int offset, b;
1102 #if 0
1103 if (1) {
1104 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1105 cpu_single_env->mem_io_vaddr, len,
1106 cpu_single_env->eip,
1107 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1109 #endif
1110 p = page_find(start >> TARGET_PAGE_BITS);
1111 if (!p)
1112 return;
1113 if (p->code_bitmap) {
1114 offset = start & ~TARGET_PAGE_MASK;
1115 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1116 if (b & ((1 << len) - 1))
1117 goto do_invalidate;
1118 } else {
1119 do_invalidate:
1120 tb_invalidate_phys_page_range(start, start + len, 1);
1124 #if !defined(CONFIG_SOFTMMU)
1125 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1126 unsigned long pc, void *puc)
1128 TranslationBlock *tb;
1129 PageDesc *p;
1130 int n;
1131 #ifdef TARGET_HAS_PRECISE_SMC
1132 TranslationBlock *current_tb = NULL;
1133 CPUState *env = cpu_single_env;
1134 int current_tb_modified = 0;
1135 target_ulong current_pc = 0;
1136 target_ulong current_cs_base = 0;
1137 int current_flags = 0;
1138 #endif
1140 addr &= TARGET_PAGE_MASK;
1141 p = page_find(addr >> TARGET_PAGE_BITS);
1142 if (!p)
1143 return;
1144 tb = p->first_tb;
1145 #ifdef TARGET_HAS_PRECISE_SMC
1146 if (tb && pc != 0) {
1147 current_tb = tb_find_pc(pc);
1149 #endif
1150 while (tb != NULL) {
1151 n = (long)tb & 3;
1152 tb = (TranslationBlock *)((long)tb & ~3);
1153 #ifdef TARGET_HAS_PRECISE_SMC
1154 if (current_tb == tb &&
1155 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1156 /* If we are modifying the current TB, we must stop
1157 its execution. We could be more precise by checking
1158 that the modification is after the current PC, but it
1159 would require a specialized function to partially
1160 restore the CPU state */
1162 current_tb_modified = 1;
1163 cpu_restore_state(current_tb, env, pc, puc);
1164 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1165 &current_flags);
1167 #endif /* TARGET_HAS_PRECISE_SMC */
1168 tb_phys_invalidate(tb, addr);
1169 tb = tb->page_next[n];
1171 p->first_tb = NULL;
1172 #ifdef TARGET_HAS_PRECISE_SMC
1173 if (current_tb_modified) {
1174 /* we generate a block containing just the instruction
1175 modifying the memory. It will ensure that it cannot modify
1176 itself */
1177 env->current_tb = NULL;
1178 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1179 cpu_resume_from_signal(env, puc);
1181 #endif
1183 #endif
1185 /* add the tb in the target page and protect it if necessary */
1186 static inline void tb_alloc_page(TranslationBlock *tb,
1187 unsigned int n, tb_page_addr_t page_addr)
1189 PageDesc *p;
1190 TranslationBlock *last_first_tb;
1192 tb->page_addr[n] = page_addr;
1193 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1194 tb->page_next[n] = p->first_tb;
1195 last_first_tb = p->first_tb;
1196 p->first_tb = (TranslationBlock *)((long)tb | n);
1197 invalidate_page_bitmap(p);
1199 #if defined(TARGET_HAS_SMC) || 1
1201 #if defined(CONFIG_USER_ONLY)
1202 if (p->flags & PAGE_WRITE) {
1203 target_ulong addr;
1204 PageDesc *p2;
1205 int prot;
1207 /* force the host page as non writable (writes will have a
1208 page fault + mprotect overhead) */
1209 page_addr &= qemu_host_page_mask;
1210 prot = 0;
1211 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1212 addr += TARGET_PAGE_SIZE) {
1214 p2 = page_find (addr >> TARGET_PAGE_BITS);
1215 if (!p2)
1216 continue;
1217 prot |= p2->flags;
1218 p2->flags &= ~PAGE_WRITE;
1220 mprotect(g2h(page_addr), qemu_host_page_size,
1221 (prot & PAGE_BITS) & ~PAGE_WRITE);
1222 #ifdef DEBUG_TB_INVALIDATE
1223 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1224 page_addr);
1225 #endif
1227 #else
1228 /* if some code is already present, then the pages are already
1229 protected. So we handle the case where only the first TB is
1230 allocated in a physical page */
1231 if (!last_first_tb) {
1232 tlb_protect_code(page_addr);
1234 #endif
1236 #endif /* TARGET_HAS_SMC */
1239 /* Allocate a new translation block. Flush the translation buffer if
1240 too many translation blocks or too much generated code. */
1241 TranslationBlock *tb_alloc(target_ulong pc)
1243 TranslationBlock *tb;
1245 if (nb_tbs >= code_gen_max_blocks ||
1246 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
1247 return NULL;
1248 tb = &tbs[nb_tbs++];
1249 tb->pc = pc;
1250 tb->cflags = 0;
1251 return tb;
1254 void tb_free(TranslationBlock *tb)
1256 /* In practice this is mostly used for single use temporary TB
1257 Ignore the hard cases and just back up if this TB happens to
1258 be the last one generated. */
1259 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
1260 code_gen_ptr = tb->tc_ptr;
1261 nb_tbs--;
1265 /* add a new TB and link it to the physical page tables. phys_page2 is
1266 (-1) to indicate that only one page contains the TB. */
1267 void tb_link_page(TranslationBlock *tb,
1268 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1270 unsigned int h;
1271 TranslationBlock **ptb;
1273 /* Grab the mmap lock to stop another thread invalidating this TB
1274 before we are done. */
1275 mmap_lock();
1276 /* add in the physical hash table */
1277 h = tb_phys_hash_func(phys_pc);
1278 ptb = &tb_phys_hash[h];
1279 tb->phys_hash_next = *ptb;
1280 *ptb = tb;
1282 /* add in the page list */
1283 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1284 if (phys_page2 != -1)
1285 tb_alloc_page(tb, 1, phys_page2);
1286 else
1287 tb->page_addr[1] = -1;
1289 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1290 tb->jmp_next[0] = NULL;
1291 tb->jmp_next[1] = NULL;
1293 /* init original jump addresses */
1294 if (tb->tb_next_offset[0] != 0xffff)
1295 tb_reset_jump(tb, 0);
1296 if (tb->tb_next_offset[1] != 0xffff)
1297 tb_reset_jump(tb, 1);
1299 #ifdef DEBUG_TB_CHECK
1300 tb_page_check();
1301 #endif
1302 mmap_unlock();
1305 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1306 tb[1].tc_ptr. Return NULL if not found */
1307 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1309 int m_min, m_max, m;
1310 unsigned long v;
1311 TranslationBlock *tb;
1313 if (nb_tbs <= 0)
1314 return NULL;
1315 if (tc_ptr < (unsigned long)code_gen_buffer ||
1316 tc_ptr >= (unsigned long)code_gen_ptr)
1317 return NULL;
1318 /* binary search (cf Knuth) */
1319 m_min = 0;
1320 m_max = nb_tbs - 1;
1321 while (m_min <= m_max) {
1322 m = (m_min + m_max) >> 1;
1323 tb = &tbs[m];
1324 v = (unsigned long)tb->tc_ptr;
1325 if (v == tc_ptr)
1326 return tb;
1327 else if (tc_ptr < v) {
1328 m_max = m - 1;
1329 } else {
1330 m_min = m + 1;
1333 return &tbs[m_max];
1336 static void tb_reset_jump_recursive(TranslationBlock *tb);
1338 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1340 TranslationBlock *tb1, *tb_next, **ptb;
1341 unsigned int n1;
1343 tb1 = tb->jmp_next[n];
1344 if (tb1 != NULL) {
1345 /* find head of list */
1346 for(;;) {
1347 n1 = (long)tb1 & 3;
1348 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1349 if (n1 == 2)
1350 break;
1351 tb1 = tb1->jmp_next[n1];
1353 /* we are now sure now that tb jumps to tb1 */
1354 tb_next = tb1;
1356 /* remove tb from the jmp_first list */
1357 ptb = &tb_next->jmp_first;
1358 for(;;) {
1359 tb1 = *ptb;
1360 n1 = (long)tb1 & 3;
1361 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1362 if (n1 == n && tb1 == tb)
1363 break;
1364 ptb = &tb1->jmp_next[n1];
1366 *ptb = tb->jmp_next[n];
1367 tb->jmp_next[n] = NULL;
1369 /* suppress the jump to next tb in generated code */
1370 tb_reset_jump(tb, n);
1372 /* suppress jumps in the tb on which we could have jumped */
1373 tb_reset_jump_recursive(tb_next);
1377 static void tb_reset_jump_recursive(TranslationBlock *tb)
1379 tb_reset_jump_recursive2(tb, 0);
1380 tb_reset_jump_recursive2(tb, 1);
1383 #if defined(TARGET_HAS_ICE)
1384 #if defined(CONFIG_USER_ONLY)
1385 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1387 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1389 #else
1390 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1392 target_phys_addr_t addr;
1393 target_ulong pd;
1394 ram_addr_t ram_addr;
1395 PhysPageDesc *p;
1397 addr = cpu_get_phys_page_debug(env, pc);
1398 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1399 if (!p) {
1400 pd = IO_MEM_UNASSIGNED;
1401 } else {
1402 pd = p->phys_offset;
1404 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1405 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1407 #endif
1408 #endif /* TARGET_HAS_ICE */
1410 #if defined(CONFIG_USER_ONLY)
1411 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1416 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1417 int flags, CPUWatchpoint **watchpoint)
1419 return -ENOSYS;
1421 #else
1422 /* Add a watchpoint. */
1423 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1424 int flags, CPUWatchpoint **watchpoint)
1426 target_ulong len_mask = ~(len - 1);
1427 CPUWatchpoint *wp;
1429 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1430 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1431 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1432 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1433 return -EINVAL;
1435 wp = qemu_malloc(sizeof(*wp));
1437 wp->vaddr = addr;
1438 wp->len_mask = len_mask;
1439 wp->flags = flags;
1441 /* keep all GDB-injected watchpoints in front */
1442 if (flags & BP_GDB)
1443 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1444 else
1445 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1447 tlb_flush_page(env, addr);
1449 if (watchpoint)
1450 *watchpoint = wp;
1451 return 0;
1454 /* Remove a specific watchpoint. */
1455 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1456 int flags)
1458 target_ulong len_mask = ~(len - 1);
1459 CPUWatchpoint *wp;
1461 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1462 if (addr == wp->vaddr && len_mask == wp->len_mask
1463 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1464 cpu_watchpoint_remove_by_ref(env, wp);
1465 return 0;
1468 return -ENOENT;
1471 /* Remove a specific watchpoint by reference. */
1472 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1474 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1476 tlb_flush_page(env, watchpoint->vaddr);
1478 qemu_free(watchpoint);
1481 /* Remove all matching watchpoints. */
1482 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1484 CPUWatchpoint *wp, *next;
1486 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1487 if (wp->flags & mask)
1488 cpu_watchpoint_remove_by_ref(env, wp);
1491 #endif
1493 /* Add a breakpoint. */
1494 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1495 CPUBreakpoint **breakpoint)
1497 #if defined(TARGET_HAS_ICE)
1498 CPUBreakpoint *bp;
1500 bp = qemu_malloc(sizeof(*bp));
1502 bp->pc = pc;
1503 bp->flags = flags;
1505 /* keep all GDB-injected breakpoints in front */
1506 if (flags & BP_GDB)
1507 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1508 else
1509 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1511 breakpoint_invalidate(env, pc);
1513 if (breakpoint)
1514 *breakpoint = bp;
1515 return 0;
1516 #else
1517 return -ENOSYS;
1518 #endif
1521 /* Remove a specific breakpoint. */
1522 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1524 #if defined(TARGET_HAS_ICE)
1525 CPUBreakpoint *bp;
1527 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1528 if (bp->pc == pc && bp->flags == flags) {
1529 cpu_breakpoint_remove_by_ref(env, bp);
1530 return 0;
1533 return -ENOENT;
1534 #else
1535 return -ENOSYS;
1536 #endif
1539 /* Remove a specific breakpoint by reference. */
1540 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1542 #if defined(TARGET_HAS_ICE)
1543 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1545 breakpoint_invalidate(env, breakpoint->pc);
1547 qemu_free(breakpoint);
1548 #endif
1551 /* Remove all matching breakpoints. */
1552 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1554 #if defined(TARGET_HAS_ICE)
1555 CPUBreakpoint *bp, *next;
1557 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1558 if (bp->flags & mask)
1559 cpu_breakpoint_remove_by_ref(env, bp);
1561 #endif
1564 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1565 CPU loop after each instruction */
1566 void cpu_single_step(CPUState *env, int enabled)
1568 #if defined(TARGET_HAS_ICE)
1569 if (env->singlestep_enabled != enabled) {
1570 env->singlestep_enabled = enabled;
1571 if (kvm_enabled())
1572 kvm_update_guest_debug(env, 0);
1573 else {
1574 /* must flush all the translated code to avoid inconsistencies */
1575 /* XXX: only flush what is necessary */
1576 tb_flush(env);
1579 #endif
1582 /* enable or disable low levels log */
1583 void cpu_set_log(int log_flags)
1585 loglevel = log_flags;
1586 if (loglevel && !logfile) {
1587 logfile = fopen(logfilename, log_append ? "a" : "w");
1588 if (!logfile) {
1589 perror(logfilename);
1590 _exit(1);
1592 #if !defined(CONFIG_SOFTMMU)
1593 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1595 static char logfile_buf[4096];
1596 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1598 #elif !defined(_WIN32)
1599 /* Win32 doesn't support line-buffering and requires size >= 2 */
1600 setvbuf(logfile, NULL, _IOLBF, 0);
1601 #endif
1602 log_append = 1;
1604 if (!loglevel && logfile) {
1605 fclose(logfile);
1606 logfile = NULL;
1610 void cpu_set_log_filename(const char *filename)
1612 logfilename = strdup(filename);
1613 if (logfile) {
1614 fclose(logfile);
1615 logfile = NULL;
1617 cpu_set_log(loglevel);
1620 static void cpu_unlink_tb(CPUState *env)
1622 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1623 problem and hope the cpu will stop of its own accord. For userspace
1624 emulation this often isn't actually as bad as it sounds. Often
1625 signals are used primarily to interrupt blocking syscalls. */
1626 TranslationBlock *tb;
1627 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1629 spin_lock(&interrupt_lock);
1630 tb = env->current_tb;
1631 /* if the cpu is currently executing code, we must unlink it and
1632 all the potentially executing TB */
1633 if (tb) {
1634 env->current_tb = NULL;
1635 tb_reset_jump_recursive(tb);
1637 spin_unlock(&interrupt_lock);
1640 /* mask must never be zero, except for A20 change call */
1641 void cpu_interrupt(CPUState *env, int mask)
1643 int old_mask;
1645 old_mask = env->interrupt_request;
1646 env->interrupt_request |= mask;
1648 #ifndef CONFIG_USER_ONLY
1650 * If called from iothread context, wake the target cpu in
1651 * case its halted.
1653 if (!qemu_cpu_self(env)) {
1654 qemu_cpu_kick(env);
1655 return;
1657 #endif
1659 if (use_icount) {
1660 env->icount_decr.u16.high = 0xffff;
1661 #ifndef CONFIG_USER_ONLY
1662 if (!can_do_io(env)
1663 && (mask & ~old_mask) != 0) {
1664 cpu_abort(env, "Raised interrupt while not in I/O function");
1666 #endif
1667 } else {
1668 cpu_unlink_tb(env);
1672 void cpu_reset_interrupt(CPUState *env, int mask)
1674 env->interrupt_request &= ~mask;
1677 void cpu_exit(CPUState *env)
1679 env->exit_request = 1;
1680 cpu_unlink_tb(env);
1683 const CPULogItem cpu_log_items[] = {
1684 { CPU_LOG_TB_OUT_ASM, "out_asm",
1685 "show generated host assembly code for each compiled TB" },
1686 { CPU_LOG_TB_IN_ASM, "in_asm",
1687 "show target assembly code for each compiled TB" },
1688 { CPU_LOG_TB_OP, "op",
1689 "show micro ops for each compiled TB" },
1690 { CPU_LOG_TB_OP_OPT, "op_opt",
1691 "show micro ops "
1692 #ifdef TARGET_I386
1693 "before eflags optimization and "
1694 #endif
1695 "after liveness analysis" },
1696 { CPU_LOG_INT, "int",
1697 "show interrupts/exceptions in short format" },
1698 { CPU_LOG_EXEC, "exec",
1699 "show trace before each executed TB (lots of logs)" },
1700 { CPU_LOG_TB_CPU, "cpu",
1701 "show CPU state before block translation" },
1702 #ifdef TARGET_I386
1703 { CPU_LOG_PCALL, "pcall",
1704 "show protected mode far calls/returns/exceptions" },
1705 { CPU_LOG_RESET, "cpu_reset",
1706 "show CPU state before CPU resets" },
1707 #endif
1708 #ifdef DEBUG_IOPORT
1709 { CPU_LOG_IOPORT, "ioport",
1710 "show all i/o ports accesses" },
1711 #endif
1712 { 0, NULL, NULL },
1715 #ifndef CONFIG_USER_ONLY
1716 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1717 = QLIST_HEAD_INITIALIZER(memory_client_list);
1719 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1720 ram_addr_t size,
1721 ram_addr_t phys_offset)
1723 CPUPhysMemoryClient *client;
1724 QLIST_FOREACH(client, &memory_client_list, list) {
1725 client->set_memory(client, start_addr, size, phys_offset);
1729 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1730 target_phys_addr_t end)
1732 CPUPhysMemoryClient *client;
1733 QLIST_FOREACH(client, &memory_client_list, list) {
1734 int r = client->sync_dirty_bitmap(client, start, end);
1735 if (r < 0)
1736 return r;
1738 return 0;
1741 static int cpu_notify_migration_log(int enable)
1743 CPUPhysMemoryClient *client;
1744 QLIST_FOREACH(client, &memory_client_list, list) {
1745 int r = client->migration_log(client, enable);
1746 if (r < 0)
1747 return r;
1749 return 0;
1752 static void phys_page_for_each_1(CPUPhysMemoryClient *client,
1753 int level, void **lp)
1755 int i;
1757 if (*lp == NULL) {
1758 return;
1760 if (level == 0) {
1761 PhysPageDesc *pd = *lp;
1762 for (i = 0; i < L2_SIZE; ++i) {
1763 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1764 client->set_memory(client, pd[i].region_offset,
1765 TARGET_PAGE_SIZE, pd[i].phys_offset);
1768 } else {
1769 void **pp = *lp;
1770 for (i = 0; i < L2_SIZE; ++i) {
1771 phys_page_for_each_1(client, level - 1, pp + i);
1776 static void phys_page_for_each(CPUPhysMemoryClient *client)
1778 int i;
1779 for (i = 0; i < P_L1_SIZE; ++i) {
1780 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1781 l1_phys_map + 1);
1785 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1787 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1788 phys_page_for_each(client);
1791 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1793 QLIST_REMOVE(client, list);
1795 #endif
1797 static int cmp1(const char *s1, int n, const char *s2)
1799 if (strlen(s2) != n)
1800 return 0;
1801 return memcmp(s1, s2, n) == 0;
1804 /* takes a comma separated list of log masks. Return 0 if error. */
1805 int cpu_str_to_log_mask(const char *str)
1807 const CPULogItem *item;
1808 int mask;
1809 const char *p, *p1;
1811 p = str;
1812 mask = 0;
1813 for(;;) {
1814 p1 = strchr(p, ',');
1815 if (!p1)
1816 p1 = p + strlen(p);
1817 if(cmp1(p,p1-p,"all")) {
1818 for(item = cpu_log_items; item->mask != 0; item++) {
1819 mask |= item->mask;
1821 } else {
1822 for(item = cpu_log_items; item->mask != 0; item++) {
1823 if (cmp1(p, p1 - p, item->name))
1824 goto found;
1826 return 0;
1828 found:
1829 mask |= item->mask;
1830 if (*p1 != ',')
1831 break;
1832 p = p1 + 1;
1834 return mask;
1837 void cpu_abort(CPUState *env, const char *fmt, ...)
1839 va_list ap;
1840 va_list ap2;
1842 va_start(ap, fmt);
1843 va_copy(ap2, ap);
1844 fprintf(stderr, "qemu: fatal: ");
1845 vfprintf(stderr, fmt, ap);
1846 fprintf(stderr, "\n");
1847 #ifdef TARGET_I386
1848 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1849 #else
1850 cpu_dump_state(env, stderr, fprintf, 0);
1851 #endif
1852 if (qemu_log_enabled()) {
1853 qemu_log("qemu: fatal: ");
1854 qemu_log_vprintf(fmt, ap2);
1855 qemu_log("\n");
1856 #ifdef TARGET_I386
1857 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1858 #else
1859 log_cpu_state(env, 0);
1860 #endif
1861 qemu_log_flush();
1862 qemu_log_close();
1864 va_end(ap2);
1865 va_end(ap);
1866 #if defined(CONFIG_USER_ONLY)
1868 struct sigaction act;
1869 sigfillset(&act.sa_mask);
1870 act.sa_handler = SIG_DFL;
1871 sigaction(SIGABRT, &act, NULL);
1873 #endif
1874 abort();
1877 CPUState *cpu_copy(CPUState *env)
1879 CPUState *new_env = cpu_init(env->cpu_model_str);
1880 CPUState *next_cpu = new_env->next_cpu;
1881 int cpu_index = new_env->cpu_index;
1882 #if defined(TARGET_HAS_ICE)
1883 CPUBreakpoint *bp;
1884 CPUWatchpoint *wp;
1885 #endif
1887 memcpy(new_env, env, sizeof(CPUState));
1889 /* Preserve chaining and index. */
1890 new_env->next_cpu = next_cpu;
1891 new_env->cpu_index = cpu_index;
1893 /* Clone all break/watchpoints.
1894 Note: Once we support ptrace with hw-debug register access, make sure
1895 BP_CPU break/watchpoints are handled correctly on clone. */
1896 QTAILQ_INIT(&env->breakpoints);
1897 QTAILQ_INIT(&env->watchpoints);
1898 #if defined(TARGET_HAS_ICE)
1899 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1900 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1902 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1903 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1904 wp->flags, NULL);
1906 #endif
1908 return new_env;
1911 #if !defined(CONFIG_USER_ONLY)
1913 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1915 unsigned int i;
1917 /* Discard jump cache entries for any tb which might potentially
1918 overlap the flushed page. */
1919 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1920 memset (&env->tb_jmp_cache[i], 0,
1921 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1923 i = tb_jmp_cache_hash_page(addr);
1924 memset (&env->tb_jmp_cache[i], 0,
1925 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1928 static CPUTLBEntry s_cputlb_empty_entry = {
1929 .addr_read = -1,
1930 .addr_write = -1,
1931 .addr_code = -1,
1932 .addend = -1,
1935 /* NOTE: if flush_global is true, also flush global entries (not
1936 implemented yet) */
1937 void tlb_flush(CPUState *env, int flush_global)
1939 int i;
1941 #if defined(DEBUG_TLB)
1942 printf("tlb_flush:\n");
1943 #endif
1944 /* must reset current TB so that interrupts cannot modify the
1945 links while we are modifying them */
1946 env->current_tb = NULL;
1948 for(i = 0; i < CPU_TLB_SIZE; i++) {
1949 int mmu_idx;
1950 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1951 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1955 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1957 env->tlb_flush_addr = -1;
1958 env->tlb_flush_mask = 0;
1959 tlb_flush_count++;
1962 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1964 if (addr == (tlb_entry->addr_read &
1965 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1966 addr == (tlb_entry->addr_write &
1967 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1968 addr == (tlb_entry->addr_code &
1969 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1970 *tlb_entry = s_cputlb_empty_entry;
1974 void tlb_flush_page(CPUState *env, target_ulong addr)
1976 int i;
1977 int mmu_idx;
1979 #if defined(DEBUG_TLB)
1980 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1981 #endif
1982 /* Check if we need to flush due to large pages. */
1983 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
1984 #if defined(DEBUG_TLB)
1985 printf("tlb_flush_page: forced full flush ("
1986 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
1987 env->tlb_flush_addr, env->tlb_flush_mask);
1988 #endif
1989 tlb_flush(env, 1);
1990 return;
1992 /* must reset current TB so that interrupts cannot modify the
1993 links while we are modifying them */
1994 env->current_tb = NULL;
1996 addr &= TARGET_PAGE_MASK;
1997 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
1998 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
1999 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2001 tlb_flush_jmp_cache(env, addr);
2004 /* update the TLBs so that writes to code in the virtual page 'addr'
2005 can be detected */
2006 static void tlb_protect_code(ram_addr_t ram_addr)
2008 cpu_physical_memory_reset_dirty(ram_addr,
2009 ram_addr + TARGET_PAGE_SIZE,
2010 CODE_DIRTY_FLAG);
2013 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2014 tested for self modifying code */
2015 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2016 target_ulong vaddr)
2018 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2021 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2022 unsigned long start, unsigned long length)
2024 unsigned long addr;
2025 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2026 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2027 if ((addr - start) < length) {
2028 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2033 /* Note: start and end must be within the same ram block. */
2034 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2035 int dirty_flags)
2037 CPUState *env;
2038 unsigned long length, start1;
2039 int i;
2041 start &= TARGET_PAGE_MASK;
2042 end = TARGET_PAGE_ALIGN(end);
2044 length = end - start;
2045 if (length == 0)
2046 return;
2047 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2049 /* we modify the TLB cache so that the dirty bit will be set again
2050 when accessing the range */
2051 start1 = (unsigned long)qemu_get_ram_ptr(start);
2052 /* Chek that we don't span multiple blocks - this breaks the
2053 address comparisons below. */
2054 if ((unsigned long)qemu_get_ram_ptr(end - 1) - start1
2055 != (end - 1) - start) {
2056 abort();
2059 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2060 int mmu_idx;
2061 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2062 for(i = 0; i < CPU_TLB_SIZE; i++)
2063 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2064 start1, length);
2069 int cpu_physical_memory_set_dirty_tracking(int enable)
2071 int ret = 0;
2072 in_migration = enable;
2073 ret = cpu_notify_migration_log(!!enable);
2074 return ret;
2077 int cpu_physical_memory_get_dirty_tracking(void)
2079 return in_migration;
2082 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2083 target_phys_addr_t end_addr)
2085 int ret;
2087 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2088 return ret;
2091 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2093 ram_addr_t ram_addr;
2094 void *p;
2096 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2097 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2098 + tlb_entry->addend);
2099 ram_addr = qemu_ram_addr_from_host(p);
2100 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2101 tlb_entry->addr_write |= TLB_NOTDIRTY;
2106 /* update the TLB according to the current state of the dirty bits */
2107 void cpu_tlb_update_dirty(CPUState *env)
2109 int i;
2110 int mmu_idx;
2111 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2112 for(i = 0; i < CPU_TLB_SIZE; i++)
2113 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2117 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2119 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2120 tlb_entry->addr_write = vaddr;
2123 /* update the TLB corresponding to virtual page vaddr
2124 so that it is no longer dirty */
2125 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2127 int i;
2128 int mmu_idx;
2130 vaddr &= TARGET_PAGE_MASK;
2131 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2132 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2133 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2136 /* Our TLB does not support large pages, so remember the area covered by
2137 large pages and trigger a full TLB flush if these are invalidated. */
2138 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2139 target_ulong size)
2141 target_ulong mask = ~(size - 1);
2143 if (env->tlb_flush_addr == (target_ulong)-1) {
2144 env->tlb_flush_addr = vaddr & mask;
2145 env->tlb_flush_mask = mask;
2146 return;
2148 /* Extend the existing region to include the new page.
2149 This is a compromise between unnecessary flushes and the cost
2150 of maintaining a full variable size TLB. */
2151 mask &= env->tlb_flush_mask;
2152 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2153 mask <<= 1;
2155 env->tlb_flush_addr &= mask;
2156 env->tlb_flush_mask = mask;
2159 /* Add a new TLB entry. At most one entry for a given virtual address
2160 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2161 supplied size is only used by tlb_flush_page. */
2162 void tlb_set_page(CPUState *env, target_ulong vaddr,
2163 target_phys_addr_t paddr, int prot,
2164 int mmu_idx, target_ulong size)
2166 PhysPageDesc *p;
2167 unsigned long pd;
2168 unsigned int index;
2169 target_ulong address;
2170 target_ulong code_address;
2171 unsigned long addend;
2172 CPUTLBEntry *te;
2173 CPUWatchpoint *wp;
2174 target_phys_addr_t iotlb;
2176 assert(size >= TARGET_PAGE_SIZE);
2177 if (size != TARGET_PAGE_SIZE) {
2178 tlb_add_large_page(env, vaddr, size);
2180 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2181 if (!p) {
2182 pd = IO_MEM_UNASSIGNED;
2183 } else {
2184 pd = p->phys_offset;
2186 #if defined(DEBUG_TLB)
2187 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x%08x prot=%x idx=%d smmu=%d pd=0x%08lx\n",
2188 vaddr, (int)paddr, prot, mmu_idx, is_softmmu, pd);
2189 #endif
2191 address = vaddr;
2192 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2193 /* IO memory case (romd handled later) */
2194 address |= TLB_MMIO;
2196 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2197 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2198 /* Normal RAM. */
2199 iotlb = pd & TARGET_PAGE_MASK;
2200 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2201 iotlb |= IO_MEM_NOTDIRTY;
2202 else
2203 iotlb |= IO_MEM_ROM;
2204 } else {
2205 /* IO handlers are currently passed a physical address.
2206 It would be nice to pass an offset from the base address
2207 of that region. This would avoid having to special case RAM,
2208 and avoid full address decoding in every device.
2209 We can't use the high bits of pd for this because
2210 IO_MEM_ROMD uses these as a ram address. */
2211 iotlb = (pd & ~TARGET_PAGE_MASK);
2212 if (p) {
2213 iotlb += p->region_offset;
2214 } else {
2215 iotlb += paddr;
2219 code_address = address;
2220 /* Make accesses to pages with watchpoints go via the
2221 watchpoint trap routines. */
2222 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2223 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2224 iotlb = io_mem_watch + paddr;
2225 /* TODO: The memory case can be optimized by not trapping
2226 reads of pages with a write breakpoint. */
2227 address |= TLB_MMIO;
2231 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2232 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2233 te = &env->tlb_table[mmu_idx][index];
2234 te->addend = addend - vaddr;
2235 if (prot & PAGE_READ) {
2236 te->addr_read = address;
2237 } else {
2238 te->addr_read = -1;
2241 if (prot & PAGE_EXEC) {
2242 te->addr_code = code_address;
2243 } else {
2244 te->addr_code = -1;
2246 if (prot & PAGE_WRITE) {
2247 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2248 (pd & IO_MEM_ROMD)) {
2249 /* Write access calls the I/O callback. */
2250 te->addr_write = address | TLB_MMIO;
2251 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2252 !cpu_physical_memory_is_dirty(pd)) {
2253 te->addr_write = address | TLB_NOTDIRTY;
2254 } else {
2255 te->addr_write = address;
2257 } else {
2258 te->addr_write = -1;
2262 #else
2264 void tlb_flush(CPUState *env, int flush_global)
2268 void tlb_flush_page(CPUState *env, target_ulong addr)
2273 * Walks guest process memory "regions" one by one
2274 * and calls callback function 'fn' for each region.
2277 struct walk_memory_regions_data
2279 walk_memory_regions_fn fn;
2280 void *priv;
2281 unsigned long start;
2282 int prot;
2285 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2286 abi_ulong end, int new_prot)
2288 if (data->start != -1ul) {
2289 int rc = data->fn(data->priv, data->start, end, data->prot);
2290 if (rc != 0) {
2291 return rc;
2295 data->start = (new_prot ? end : -1ul);
2296 data->prot = new_prot;
2298 return 0;
2301 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2302 abi_ulong base, int level, void **lp)
2304 abi_ulong pa;
2305 int i, rc;
2307 if (*lp == NULL) {
2308 return walk_memory_regions_end(data, base, 0);
2311 if (level == 0) {
2312 PageDesc *pd = *lp;
2313 for (i = 0; i < L2_SIZE; ++i) {
2314 int prot = pd[i].flags;
2316 pa = base | (i << TARGET_PAGE_BITS);
2317 if (prot != data->prot) {
2318 rc = walk_memory_regions_end(data, pa, prot);
2319 if (rc != 0) {
2320 return rc;
2324 } else {
2325 void **pp = *lp;
2326 for (i = 0; i < L2_SIZE; ++i) {
2327 pa = base | ((abi_ulong)i <<
2328 (TARGET_PAGE_BITS + L2_BITS * level));
2329 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2330 if (rc != 0) {
2331 return rc;
2336 return 0;
2339 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2341 struct walk_memory_regions_data data;
2342 unsigned long i;
2344 data.fn = fn;
2345 data.priv = priv;
2346 data.start = -1ul;
2347 data.prot = 0;
2349 for (i = 0; i < V_L1_SIZE; i++) {
2350 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2351 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2352 if (rc != 0) {
2353 return rc;
2357 return walk_memory_regions_end(&data, 0, 0);
2360 static int dump_region(void *priv, abi_ulong start,
2361 abi_ulong end, unsigned long prot)
2363 FILE *f = (FILE *)priv;
2365 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2366 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2367 start, end, end - start,
2368 ((prot & PAGE_READ) ? 'r' : '-'),
2369 ((prot & PAGE_WRITE) ? 'w' : '-'),
2370 ((prot & PAGE_EXEC) ? 'x' : '-'));
2372 return (0);
2375 /* dump memory mappings */
2376 void page_dump(FILE *f)
2378 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2379 "start", "end", "size", "prot");
2380 walk_memory_regions(f, dump_region);
2383 int page_get_flags(target_ulong address)
2385 PageDesc *p;
2387 p = page_find(address >> TARGET_PAGE_BITS);
2388 if (!p)
2389 return 0;
2390 return p->flags;
2393 /* Modify the flags of a page and invalidate the code if necessary.
2394 The flag PAGE_WRITE_ORG is positioned automatically depending
2395 on PAGE_WRITE. The mmap_lock should already be held. */
2396 void page_set_flags(target_ulong start, target_ulong end, int flags)
2398 target_ulong addr, len;
2400 /* This function should never be called with addresses outside the
2401 guest address space. If this assert fires, it probably indicates
2402 a missing call to h2g_valid. */
2403 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2404 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2405 #endif
2406 assert(start < end);
2408 start = start & TARGET_PAGE_MASK;
2409 end = TARGET_PAGE_ALIGN(end);
2411 if (flags & PAGE_WRITE) {
2412 flags |= PAGE_WRITE_ORG;
2415 for (addr = start, len = end - start;
2416 len != 0;
2417 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2418 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2420 /* If the write protection bit is set, then we invalidate
2421 the code inside. */
2422 if (!(p->flags & PAGE_WRITE) &&
2423 (flags & PAGE_WRITE) &&
2424 p->first_tb) {
2425 tb_invalidate_phys_page(addr, 0, NULL);
2427 p->flags = flags;
2431 int page_check_range(target_ulong start, target_ulong len, int flags)
2433 PageDesc *p;
2434 target_ulong end;
2435 target_ulong addr;
2437 /* This function should never be called with addresses outside the
2438 guest address space. If this assert fires, it probably indicates
2439 a missing call to h2g_valid. */
2440 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2441 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2442 #endif
2444 if (len == 0) {
2445 return 0;
2447 if (start + len - 1 < start) {
2448 /* We've wrapped around. */
2449 return -1;
2452 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2453 start = start & TARGET_PAGE_MASK;
2455 for (addr = start, len = end - start;
2456 len != 0;
2457 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2458 p = page_find(addr >> TARGET_PAGE_BITS);
2459 if( !p )
2460 return -1;
2461 if( !(p->flags & PAGE_VALID) )
2462 return -1;
2464 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2465 return -1;
2466 if (flags & PAGE_WRITE) {
2467 if (!(p->flags & PAGE_WRITE_ORG))
2468 return -1;
2469 /* unprotect the page if it was put read-only because it
2470 contains translated code */
2471 if (!(p->flags & PAGE_WRITE)) {
2472 if (!page_unprotect(addr, 0, NULL))
2473 return -1;
2475 return 0;
2478 return 0;
2481 /* called from signal handler: invalidate the code and unprotect the
2482 page. Return TRUE if the fault was successfully handled. */
2483 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2485 unsigned int prot;
2486 PageDesc *p;
2487 target_ulong host_start, host_end, addr;
2489 /* Technically this isn't safe inside a signal handler. However we
2490 know this only ever happens in a synchronous SEGV handler, so in
2491 practice it seems to be ok. */
2492 mmap_lock();
2494 p = page_find(address >> TARGET_PAGE_BITS);
2495 if (!p) {
2496 mmap_unlock();
2497 return 0;
2500 /* if the page was really writable, then we change its
2501 protection back to writable */
2502 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2503 host_start = address & qemu_host_page_mask;
2504 host_end = host_start + qemu_host_page_size;
2506 prot = 0;
2507 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2508 p = page_find(addr >> TARGET_PAGE_BITS);
2509 p->flags |= PAGE_WRITE;
2510 prot |= p->flags;
2512 /* and since the content will be modified, we must invalidate
2513 the corresponding translated code. */
2514 tb_invalidate_phys_page(addr, pc, puc);
2515 #ifdef DEBUG_TB_CHECK
2516 tb_invalidate_check(addr);
2517 #endif
2519 mprotect((void *)g2h(host_start), qemu_host_page_size,
2520 prot & PAGE_BITS);
2522 mmap_unlock();
2523 return 1;
2525 mmap_unlock();
2526 return 0;
2529 static inline void tlb_set_dirty(CPUState *env,
2530 unsigned long addr, target_ulong vaddr)
2533 #endif /* defined(CONFIG_USER_ONLY) */
2535 #if !defined(CONFIG_USER_ONLY)
2537 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2538 typedef struct subpage_t {
2539 target_phys_addr_t base;
2540 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2541 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2542 } subpage_t;
2544 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2545 ram_addr_t memory, ram_addr_t region_offset);
2546 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2547 ram_addr_t orig_memory,
2548 ram_addr_t region_offset);
2549 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2550 need_subpage) \
2551 do { \
2552 if (addr > start_addr) \
2553 start_addr2 = 0; \
2554 else { \
2555 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2556 if (start_addr2 > 0) \
2557 need_subpage = 1; \
2560 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2561 end_addr2 = TARGET_PAGE_SIZE - 1; \
2562 else { \
2563 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2564 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2565 need_subpage = 1; \
2567 } while (0)
2569 /* register physical memory.
2570 For RAM, 'size' must be a multiple of the target page size.
2571 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2572 io memory page. The address used when calling the IO function is
2573 the offset from the start of the region, plus region_offset. Both
2574 start_addr and region_offset are rounded down to a page boundary
2575 before calculating this offset. This should not be a problem unless
2576 the low bits of start_addr and region_offset differ. */
2577 void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
2578 ram_addr_t size,
2579 ram_addr_t phys_offset,
2580 ram_addr_t region_offset)
2582 target_phys_addr_t addr, end_addr;
2583 PhysPageDesc *p;
2584 CPUState *env;
2585 ram_addr_t orig_size = size;
2586 subpage_t *subpage;
2588 cpu_notify_set_memory(start_addr, size, phys_offset);
2590 if (phys_offset == IO_MEM_UNASSIGNED) {
2591 region_offset = start_addr;
2593 region_offset &= TARGET_PAGE_MASK;
2594 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2595 end_addr = start_addr + (target_phys_addr_t)size;
2596 for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {
2597 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2598 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2599 ram_addr_t orig_memory = p->phys_offset;
2600 target_phys_addr_t start_addr2, end_addr2;
2601 int need_subpage = 0;
2603 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2604 need_subpage);
2605 if (need_subpage) {
2606 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2607 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2608 &p->phys_offset, orig_memory,
2609 p->region_offset);
2610 } else {
2611 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2612 >> IO_MEM_SHIFT];
2614 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2615 region_offset);
2616 p->region_offset = 0;
2617 } else {
2618 p->phys_offset = phys_offset;
2619 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2620 (phys_offset & IO_MEM_ROMD))
2621 phys_offset += TARGET_PAGE_SIZE;
2623 } else {
2624 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2625 p->phys_offset = phys_offset;
2626 p->region_offset = region_offset;
2627 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2628 (phys_offset & IO_MEM_ROMD)) {
2629 phys_offset += TARGET_PAGE_SIZE;
2630 } else {
2631 target_phys_addr_t start_addr2, end_addr2;
2632 int need_subpage = 0;
2634 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2635 end_addr2, need_subpage);
2637 if (need_subpage) {
2638 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2639 &p->phys_offset, IO_MEM_UNASSIGNED,
2640 addr & TARGET_PAGE_MASK);
2641 subpage_register(subpage, start_addr2, end_addr2,
2642 phys_offset, region_offset);
2643 p->region_offset = 0;
2647 region_offset += TARGET_PAGE_SIZE;
2650 /* since each CPU stores ram addresses in its TLB cache, we must
2651 reset the modified entries */
2652 /* XXX: slow ! */
2653 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2654 tlb_flush(env, 1);
2658 /* XXX: temporary until new memory mapping API */
2659 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2661 PhysPageDesc *p;
2663 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2664 if (!p)
2665 return IO_MEM_UNASSIGNED;
2666 return p->phys_offset;
2669 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2671 if (kvm_enabled())
2672 kvm_coalesce_mmio_region(addr, size);
2675 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2677 if (kvm_enabled())
2678 kvm_uncoalesce_mmio_region(addr, size);
2681 void qemu_flush_coalesced_mmio_buffer(void)
2683 if (kvm_enabled())
2684 kvm_flush_coalesced_mmio_buffer();
2687 #if defined(__linux__) && !defined(TARGET_S390X)
2689 #include <sys/vfs.h>
2691 #define HUGETLBFS_MAGIC 0x958458f6
2693 static long gethugepagesize(const char *path)
2695 struct statfs fs;
2696 int ret;
2698 do {
2699 ret = statfs(path, &fs);
2700 } while (ret != 0 && errno == EINTR);
2702 if (ret != 0) {
2703 perror(path);
2704 return 0;
2707 if (fs.f_type != HUGETLBFS_MAGIC)
2708 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2710 return fs.f_bsize;
2713 static void *file_ram_alloc(ram_addr_t memory, const char *path)
2715 char *filename;
2716 void *area;
2717 int fd;
2718 #ifdef MAP_POPULATE
2719 int flags;
2720 #endif
2721 unsigned long hpagesize;
2723 hpagesize = gethugepagesize(path);
2724 if (!hpagesize) {
2725 return NULL;
2728 if (memory < hpagesize) {
2729 return NULL;
2732 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2733 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2734 return NULL;
2737 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2738 return NULL;
2741 fd = mkstemp(filename);
2742 if (fd < 0) {
2743 perror("unable to create backing store for hugepages");
2744 free(filename);
2745 return NULL;
2747 unlink(filename);
2748 free(filename);
2750 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2753 * ftruncate is not supported by hugetlbfs in older
2754 * hosts, so don't bother bailing out on errors.
2755 * If anything goes wrong with it under other filesystems,
2756 * mmap will fail.
2758 if (ftruncate(fd, memory))
2759 perror("ftruncate");
2761 #ifdef MAP_POPULATE
2762 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2763 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2764 * to sidestep this quirk.
2766 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2767 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2768 #else
2769 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2770 #endif
2771 if (area == MAP_FAILED) {
2772 perror("file_ram_alloc: can't mmap RAM pages");
2773 close(fd);
2774 return (NULL);
2776 return area;
2778 #endif
2780 ram_addr_t qemu_ram_alloc(ram_addr_t size)
2782 RAMBlock *new_block;
2784 size = TARGET_PAGE_ALIGN(size);
2785 new_block = qemu_malloc(sizeof(*new_block));
2787 if (mem_path) {
2788 #if defined (__linux__) && !defined(TARGET_S390X)
2789 new_block->host = file_ram_alloc(size, mem_path);
2790 if (!new_block->host) {
2791 new_block->host = qemu_vmalloc(size);
2792 #ifdef MADV_MERGEABLE
2793 madvise(new_block->host, size, MADV_MERGEABLE);
2794 #endif
2796 #else
2797 fprintf(stderr, "-mem-path option unsupported\n");
2798 exit(1);
2799 #endif
2800 } else {
2801 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2802 /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
2803 new_block->host = mmap((void*)0x1000000, size,
2804 PROT_EXEC|PROT_READ|PROT_WRITE,
2805 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
2806 #else
2807 new_block->host = qemu_vmalloc(size);
2808 #endif
2809 #ifdef MADV_MERGEABLE
2810 madvise(new_block->host, size, MADV_MERGEABLE);
2811 #endif
2813 new_block->offset = last_ram_offset;
2814 new_block->length = size;
2816 new_block->next = ram_blocks;
2817 ram_blocks = new_block;
2819 phys_ram_dirty = qemu_realloc(phys_ram_dirty,
2820 (last_ram_offset + size) >> TARGET_PAGE_BITS);
2821 memset(phys_ram_dirty + (last_ram_offset >> TARGET_PAGE_BITS),
2822 0xff, size >> TARGET_PAGE_BITS);
2824 last_ram_offset += size;
2826 if (kvm_enabled())
2827 kvm_setup_guest_memory(new_block->host, size);
2829 return new_block->offset;
2832 void qemu_ram_free(ram_addr_t addr)
2834 /* TODO: implement this. */
2837 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2838 With the exception of the softmmu code in this file, this should
2839 only be used for local memory (e.g. video ram) that the device owns,
2840 and knows it isn't going to access beyond the end of the block.
2842 It should not be used for general purpose DMA.
2843 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2845 void *qemu_get_ram_ptr(ram_addr_t addr)
2847 RAMBlock *prev;
2848 RAMBlock **prevp;
2849 RAMBlock *block;
2851 prev = NULL;
2852 prevp = &ram_blocks;
2853 block = ram_blocks;
2854 while (block && (block->offset > addr
2855 || block->offset + block->length <= addr)) {
2856 if (prev)
2857 prevp = &prev->next;
2858 prev = block;
2859 block = block->next;
2861 if (!block) {
2862 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2863 abort();
2865 /* Move this entry to to start of the list. */
2866 if (prev) {
2867 prev->next = block->next;
2868 block->next = *prevp;
2869 *prevp = block;
2871 return block->host + (addr - block->offset);
2874 /* Some of the softmmu routines need to translate from a host pointer
2875 (typically a TLB entry) back to a ram offset. */
2876 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2878 RAMBlock *block;
2879 uint8_t *host = ptr;
2881 block = ram_blocks;
2882 while (block && (block->host > host
2883 || block->host + block->length <= host)) {
2884 block = block->next;
2886 if (!block) {
2887 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2888 abort();
2890 return block->offset + (host - block->host);
2893 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
2895 #ifdef DEBUG_UNASSIGNED
2896 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2897 #endif
2898 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2899 do_unassigned_access(addr, 0, 0, 0, 1);
2900 #endif
2901 return 0;
2904 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
2906 #ifdef DEBUG_UNASSIGNED
2907 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2908 #endif
2909 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2910 do_unassigned_access(addr, 0, 0, 0, 2);
2911 #endif
2912 return 0;
2915 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
2917 #ifdef DEBUG_UNASSIGNED
2918 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2919 #endif
2920 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2921 do_unassigned_access(addr, 0, 0, 0, 4);
2922 #endif
2923 return 0;
2926 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
2928 #ifdef DEBUG_UNASSIGNED
2929 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
2930 #endif
2931 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2932 do_unassigned_access(addr, 1, 0, 0, 1);
2933 #endif
2936 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
2938 #ifdef DEBUG_UNASSIGNED
2939 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
2940 #endif
2941 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2942 do_unassigned_access(addr, 1, 0, 0, 2);
2943 #endif
2946 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
2948 #ifdef DEBUG_UNASSIGNED
2949 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
2950 #endif
2951 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2952 do_unassigned_access(addr, 1, 0, 0, 4);
2953 #endif
2956 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
2957 unassigned_mem_readb,
2958 unassigned_mem_readw,
2959 unassigned_mem_readl,
2962 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
2963 unassigned_mem_writeb,
2964 unassigned_mem_writew,
2965 unassigned_mem_writel,
2968 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
2969 uint32_t val)
2971 int dirty_flags;
2972 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2973 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2974 #if !defined(CONFIG_USER_ONLY)
2975 tb_invalidate_phys_page_fast(ram_addr, 1);
2976 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2977 #endif
2979 stb_p(qemu_get_ram_ptr(ram_addr), val);
2980 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2981 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2982 /* we remove the notdirty callback only if the code has been
2983 flushed */
2984 if (dirty_flags == 0xff)
2985 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2988 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
2989 uint32_t val)
2991 int dirty_flags;
2992 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2993 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2994 #if !defined(CONFIG_USER_ONLY)
2995 tb_invalidate_phys_page_fast(ram_addr, 2);
2996 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2997 #endif
2999 stw_p(qemu_get_ram_ptr(ram_addr), val);
3000 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3001 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3002 /* we remove the notdirty callback only if the code has been
3003 flushed */
3004 if (dirty_flags == 0xff)
3005 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3008 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3009 uint32_t val)
3011 int dirty_flags;
3012 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3013 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3014 #if !defined(CONFIG_USER_ONLY)
3015 tb_invalidate_phys_page_fast(ram_addr, 4);
3016 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3017 #endif
3019 stl_p(qemu_get_ram_ptr(ram_addr), val);
3020 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3021 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3022 /* we remove the notdirty callback only if the code has been
3023 flushed */
3024 if (dirty_flags == 0xff)
3025 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3028 static CPUReadMemoryFunc * const error_mem_read[3] = {
3029 NULL, /* never used */
3030 NULL, /* never used */
3031 NULL, /* never used */
3034 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3035 notdirty_mem_writeb,
3036 notdirty_mem_writew,
3037 notdirty_mem_writel,
3040 /* Generate a debug exception if a watchpoint has been hit. */
3041 static void check_watchpoint(int offset, int len_mask, int flags)
3043 CPUState *env = cpu_single_env;
3044 target_ulong pc, cs_base;
3045 TranslationBlock *tb;
3046 target_ulong vaddr;
3047 CPUWatchpoint *wp;
3048 int cpu_flags;
3050 if (env->watchpoint_hit) {
3051 /* We re-entered the check after replacing the TB. Now raise
3052 * the debug interrupt so that is will trigger after the
3053 * current instruction. */
3054 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3055 return;
3057 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3058 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3059 if ((vaddr == (wp->vaddr & len_mask) ||
3060 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3061 wp->flags |= BP_WATCHPOINT_HIT;
3062 if (!env->watchpoint_hit) {
3063 env->watchpoint_hit = wp;
3064 tb = tb_find_pc(env->mem_io_pc);
3065 if (!tb) {
3066 cpu_abort(env, "check_watchpoint: could not find TB for "
3067 "pc=%p", (void *)env->mem_io_pc);
3069 cpu_restore_state(tb, env, env->mem_io_pc, NULL);
3070 tb_phys_invalidate(tb, -1);
3071 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3072 env->exception_index = EXCP_DEBUG;
3073 } else {
3074 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3075 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3077 cpu_resume_from_signal(env, NULL);
3079 } else {
3080 wp->flags &= ~BP_WATCHPOINT_HIT;
3085 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3086 so these check for a hit then pass through to the normal out-of-line
3087 phys routines. */
3088 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3090 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3091 return ldub_phys(addr);
3094 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3096 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3097 return lduw_phys(addr);
3100 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3102 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3103 return ldl_phys(addr);
3106 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3107 uint32_t val)
3109 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3110 stb_phys(addr, val);
3113 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3114 uint32_t val)
3116 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3117 stw_phys(addr, val);
3120 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3121 uint32_t val)
3123 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3124 stl_phys(addr, val);
3127 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3128 watch_mem_readb,
3129 watch_mem_readw,
3130 watch_mem_readl,
3133 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3134 watch_mem_writeb,
3135 watch_mem_writew,
3136 watch_mem_writel,
3139 static inline uint32_t subpage_readlen (subpage_t *mmio,
3140 target_phys_addr_t addr,
3141 unsigned int len)
3143 unsigned int idx = SUBPAGE_IDX(addr);
3144 #if defined(DEBUG_SUBPAGE)
3145 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3146 mmio, len, addr, idx);
3147 #endif
3149 addr += mmio->region_offset[idx];
3150 idx = mmio->sub_io_index[idx];
3151 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3154 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3155 uint32_t value, unsigned int len)
3157 unsigned int idx = SUBPAGE_IDX(addr);
3158 #if defined(DEBUG_SUBPAGE)
3159 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3160 __func__, mmio, len, addr, idx, value);
3161 #endif
3163 addr += mmio->region_offset[idx];
3164 idx = mmio->sub_io_index[idx];
3165 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3168 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3170 return subpage_readlen(opaque, addr, 0);
3173 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3174 uint32_t value)
3176 subpage_writelen(opaque, addr, value, 0);
3179 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3181 return subpage_readlen(opaque, addr, 1);
3184 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3185 uint32_t value)
3187 subpage_writelen(opaque, addr, value, 1);
3190 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3192 return subpage_readlen(opaque, addr, 2);
3195 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3196 uint32_t value)
3198 subpage_writelen(opaque, addr, value, 2);
3201 static CPUReadMemoryFunc * const subpage_read[] = {
3202 &subpage_readb,
3203 &subpage_readw,
3204 &subpage_readl,
3207 static CPUWriteMemoryFunc * const subpage_write[] = {
3208 &subpage_writeb,
3209 &subpage_writew,
3210 &subpage_writel,
3213 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3214 ram_addr_t memory, ram_addr_t region_offset)
3216 int idx, eidx;
3218 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3219 return -1;
3220 idx = SUBPAGE_IDX(start);
3221 eidx = SUBPAGE_IDX(end);
3222 #if defined(DEBUG_SUBPAGE)
3223 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3224 mmio, start, end, idx, eidx, memory);
3225 #endif
3226 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3227 for (; idx <= eidx; idx++) {
3228 mmio->sub_io_index[idx] = memory;
3229 mmio->region_offset[idx] = region_offset;
3232 return 0;
3235 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3236 ram_addr_t orig_memory,
3237 ram_addr_t region_offset)
3239 subpage_t *mmio;
3240 int subpage_memory;
3242 mmio = qemu_mallocz(sizeof(subpage_t));
3244 mmio->base = base;
3245 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio);
3246 #if defined(DEBUG_SUBPAGE)
3247 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3248 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3249 #endif
3250 *phys = subpage_memory | IO_MEM_SUBPAGE;
3251 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3253 return mmio;
3256 static int get_free_io_mem_idx(void)
3258 int i;
3260 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3261 if (!io_mem_used[i]) {
3262 io_mem_used[i] = 1;
3263 return i;
3265 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3266 return -1;
3269 /* mem_read and mem_write are arrays of functions containing the
3270 function to access byte (index 0), word (index 1) and dword (index
3271 2). Functions can be omitted with a NULL function pointer.
3272 If io_index is non zero, the corresponding io zone is
3273 modified. If it is zero, a new io zone is allocated. The return
3274 value can be used with cpu_register_physical_memory(). (-1) is
3275 returned if error. */
3276 static int cpu_register_io_memory_fixed(int io_index,
3277 CPUReadMemoryFunc * const *mem_read,
3278 CPUWriteMemoryFunc * const *mem_write,
3279 void *opaque)
3281 int i;
3283 if (io_index <= 0) {
3284 io_index = get_free_io_mem_idx();
3285 if (io_index == -1)
3286 return io_index;
3287 } else {
3288 io_index >>= IO_MEM_SHIFT;
3289 if (io_index >= IO_MEM_NB_ENTRIES)
3290 return -1;
3293 for (i = 0; i < 3; ++i) {
3294 io_mem_read[io_index][i]
3295 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3297 for (i = 0; i < 3; ++i) {
3298 io_mem_write[io_index][i]
3299 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3301 io_mem_opaque[io_index] = opaque;
3303 return (io_index << IO_MEM_SHIFT);
3306 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3307 CPUWriteMemoryFunc * const *mem_write,
3308 void *opaque)
3310 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque);
3313 void cpu_unregister_io_memory(int io_table_address)
3315 int i;
3316 int io_index = io_table_address >> IO_MEM_SHIFT;
3318 for (i=0;i < 3; i++) {
3319 io_mem_read[io_index][i] = unassigned_mem_read[i];
3320 io_mem_write[io_index][i] = unassigned_mem_write[i];
3322 io_mem_opaque[io_index] = NULL;
3323 io_mem_used[io_index] = 0;
3326 static void io_mem_init(void)
3328 int i;
3330 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read, unassigned_mem_write, NULL);
3331 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read, unassigned_mem_write, NULL);
3332 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read, notdirty_mem_write, NULL);
3333 for (i=0; i<5; i++)
3334 io_mem_used[i] = 1;
3336 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3337 watch_mem_write, NULL);
3340 #endif /* !defined(CONFIG_USER_ONLY) */
3342 /* physical memory access (slow version, mainly for debug) */
3343 #if defined(CONFIG_USER_ONLY)
3344 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3345 uint8_t *buf, int len, int is_write)
3347 int l, flags;
3348 target_ulong page;
3349 void * p;
3351 while (len > 0) {
3352 page = addr & TARGET_PAGE_MASK;
3353 l = (page + TARGET_PAGE_SIZE) - addr;
3354 if (l > len)
3355 l = len;
3356 flags = page_get_flags(page);
3357 if (!(flags & PAGE_VALID))
3358 return -1;
3359 if (is_write) {
3360 if (!(flags & PAGE_WRITE))
3361 return -1;
3362 /* XXX: this code should not depend on lock_user */
3363 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3364 return -1;
3365 memcpy(p, buf, l);
3366 unlock_user(p, addr, l);
3367 } else {
3368 if (!(flags & PAGE_READ))
3369 return -1;
3370 /* XXX: this code should not depend on lock_user */
3371 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3372 return -1;
3373 memcpy(buf, p, l);
3374 unlock_user(p, addr, 0);
3376 len -= l;
3377 buf += l;
3378 addr += l;
3380 return 0;
3383 #else
3384 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3385 int len, int is_write)
3387 int l, io_index;
3388 uint8_t *ptr;
3389 uint32_t val;
3390 target_phys_addr_t page;
3391 unsigned long pd;
3392 PhysPageDesc *p;
3394 while (len > 0) {
3395 page = addr & TARGET_PAGE_MASK;
3396 l = (page + TARGET_PAGE_SIZE) - addr;
3397 if (l > len)
3398 l = len;
3399 p = phys_page_find(page >> TARGET_PAGE_BITS);
3400 if (!p) {
3401 pd = IO_MEM_UNASSIGNED;
3402 } else {
3403 pd = p->phys_offset;
3406 if (is_write) {
3407 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3408 target_phys_addr_t addr1 = addr;
3409 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3410 if (p)
3411 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3412 /* XXX: could force cpu_single_env to NULL to avoid
3413 potential bugs */
3414 if (l >= 4 && ((addr1 & 3) == 0)) {
3415 /* 32 bit write access */
3416 val = ldl_p(buf);
3417 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3418 l = 4;
3419 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3420 /* 16 bit write access */
3421 val = lduw_p(buf);
3422 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3423 l = 2;
3424 } else {
3425 /* 8 bit write access */
3426 val = ldub_p(buf);
3427 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3428 l = 1;
3430 } else {
3431 unsigned long addr1;
3432 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3433 /* RAM case */
3434 ptr = qemu_get_ram_ptr(addr1);
3435 memcpy(ptr, buf, l);
3436 if (!cpu_physical_memory_is_dirty(addr1)) {
3437 /* invalidate code */
3438 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3439 /* set dirty bit */
3440 cpu_physical_memory_set_dirty_flags(
3441 addr1, (0xff & ~CODE_DIRTY_FLAG));
3444 } else {
3445 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3446 !(pd & IO_MEM_ROMD)) {
3447 target_phys_addr_t addr1 = addr;
3448 /* I/O case */
3449 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3450 if (p)
3451 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3452 if (l >= 4 && ((addr1 & 3) == 0)) {
3453 /* 32 bit read access */
3454 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3455 stl_p(buf, val);
3456 l = 4;
3457 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3458 /* 16 bit read access */
3459 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3460 stw_p(buf, val);
3461 l = 2;
3462 } else {
3463 /* 8 bit read access */
3464 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3465 stb_p(buf, val);
3466 l = 1;
3468 } else {
3469 /* RAM case */
3470 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3471 (addr & ~TARGET_PAGE_MASK);
3472 memcpy(buf, ptr, l);
3475 len -= l;
3476 buf += l;
3477 addr += l;
3481 /* used for ROM loading : can write in RAM and ROM */
3482 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3483 const uint8_t *buf, int len)
3485 int l;
3486 uint8_t *ptr;
3487 target_phys_addr_t page;
3488 unsigned long pd;
3489 PhysPageDesc *p;
3491 while (len > 0) {
3492 page = addr & TARGET_PAGE_MASK;
3493 l = (page + TARGET_PAGE_SIZE) - addr;
3494 if (l > len)
3495 l = len;
3496 p = phys_page_find(page >> TARGET_PAGE_BITS);
3497 if (!p) {
3498 pd = IO_MEM_UNASSIGNED;
3499 } else {
3500 pd = p->phys_offset;
3503 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3504 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3505 !(pd & IO_MEM_ROMD)) {
3506 /* do nothing */
3507 } else {
3508 unsigned long addr1;
3509 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3510 /* ROM/RAM case */
3511 ptr = qemu_get_ram_ptr(addr1);
3512 memcpy(ptr, buf, l);
3514 len -= l;
3515 buf += l;
3516 addr += l;
3520 typedef struct {
3521 void *buffer;
3522 target_phys_addr_t addr;
3523 target_phys_addr_t len;
3524 } BounceBuffer;
3526 static BounceBuffer bounce;
3528 typedef struct MapClient {
3529 void *opaque;
3530 void (*callback)(void *opaque);
3531 QLIST_ENTRY(MapClient) link;
3532 } MapClient;
3534 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3535 = QLIST_HEAD_INITIALIZER(map_client_list);
3537 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3539 MapClient *client = qemu_malloc(sizeof(*client));
3541 client->opaque = opaque;
3542 client->callback = callback;
3543 QLIST_INSERT_HEAD(&map_client_list, client, link);
3544 return client;
3547 void cpu_unregister_map_client(void *_client)
3549 MapClient *client = (MapClient *)_client;
3551 QLIST_REMOVE(client, link);
3552 qemu_free(client);
3555 static void cpu_notify_map_clients(void)
3557 MapClient *client;
3559 while (!QLIST_EMPTY(&map_client_list)) {
3560 client = QLIST_FIRST(&map_client_list);
3561 client->callback(client->opaque);
3562 cpu_unregister_map_client(client);
3566 /* Map a physical memory region into a host virtual address.
3567 * May map a subset of the requested range, given by and returned in *plen.
3568 * May return NULL if resources needed to perform the mapping are exhausted.
3569 * Use only for reads OR writes - not for read-modify-write operations.
3570 * Use cpu_register_map_client() to know when retrying the map operation is
3571 * likely to succeed.
3573 void *cpu_physical_memory_map(target_phys_addr_t addr,
3574 target_phys_addr_t *plen,
3575 int is_write)
3577 target_phys_addr_t len = *plen;
3578 target_phys_addr_t done = 0;
3579 int l;
3580 uint8_t *ret = NULL;
3581 uint8_t *ptr;
3582 target_phys_addr_t page;
3583 unsigned long pd;
3584 PhysPageDesc *p;
3585 unsigned long addr1;
3587 while (len > 0) {
3588 page = addr & TARGET_PAGE_MASK;
3589 l = (page + TARGET_PAGE_SIZE) - addr;
3590 if (l > len)
3591 l = len;
3592 p = phys_page_find(page >> TARGET_PAGE_BITS);
3593 if (!p) {
3594 pd = IO_MEM_UNASSIGNED;
3595 } else {
3596 pd = p->phys_offset;
3599 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3600 if (done || bounce.buffer) {
3601 break;
3603 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3604 bounce.addr = addr;
3605 bounce.len = l;
3606 if (!is_write) {
3607 cpu_physical_memory_rw(addr, bounce.buffer, l, 0);
3609 ptr = bounce.buffer;
3610 } else {
3611 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3612 ptr = qemu_get_ram_ptr(addr1);
3614 if (!done) {
3615 ret = ptr;
3616 } else if (ret + done != ptr) {
3617 break;
3620 len -= l;
3621 addr += l;
3622 done += l;
3624 *plen = done;
3625 return ret;
3628 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3629 * Will also mark the memory as dirty if is_write == 1. access_len gives
3630 * the amount of memory that was actually read or written by the caller.
3632 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3633 int is_write, target_phys_addr_t access_len)
3635 if (buffer != bounce.buffer) {
3636 if (is_write) {
3637 ram_addr_t addr1 = qemu_ram_addr_from_host(buffer);
3638 while (access_len) {
3639 unsigned l;
3640 l = TARGET_PAGE_SIZE;
3641 if (l > access_len)
3642 l = access_len;
3643 if (!cpu_physical_memory_is_dirty(addr1)) {
3644 /* invalidate code */
3645 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3646 /* set dirty bit */
3647 cpu_physical_memory_set_dirty_flags(
3648 addr1, (0xff & ~CODE_DIRTY_FLAG));
3650 addr1 += l;
3651 access_len -= l;
3654 return;
3656 if (is_write) {
3657 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3659 qemu_vfree(bounce.buffer);
3660 bounce.buffer = NULL;
3661 cpu_notify_map_clients();
3664 /* warning: addr must be aligned */
3665 uint32_t ldl_phys(target_phys_addr_t addr)
3667 int io_index;
3668 uint8_t *ptr;
3669 uint32_t val;
3670 unsigned long pd;
3671 PhysPageDesc *p;
3673 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3674 if (!p) {
3675 pd = IO_MEM_UNASSIGNED;
3676 } else {
3677 pd = p->phys_offset;
3680 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3681 !(pd & IO_MEM_ROMD)) {
3682 /* I/O case */
3683 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3684 if (p)
3685 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3686 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
3687 } else {
3688 /* RAM case */
3689 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3690 (addr & ~TARGET_PAGE_MASK);
3691 val = ldl_p(ptr);
3693 return val;
3696 /* warning: addr must be aligned */
3697 uint64_t ldq_phys(target_phys_addr_t addr)
3699 int io_index;
3700 uint8_t *ptr;
3701 uint64_t val;
3702 unsigned long pd;
3703 PhysPageDesc *p;
3705 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3706 if (!p) {
3707 pd = IO_MEM_UNASSIGNED;
3708 } else {
3709 pd = p->phys_offset;
3712 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3713 !(pd & IO_MEM_ROMD)) {
3714 /* I/O case */
3715 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3716 if (p)
3717 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3718 #ifdef TARGET_WORDS_BIGENDIAN
3719 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
3720 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
3721 #else
3722 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
3723 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
3724 #endif
3725 } else {
3726 /* RAM case */
3727 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3728 (addr & ~TARGET_PAGE_MASK);
3729 val = ldq_p(ptr);
3731 return val;
3734 /* XXX: optimize */
3735 uint32_t ldub_phys(target_phys_addr_t addr)
3737 uint8_t val;
3738 cpu_physical_memory_read(addr, &val, 1);
3739 return val;
3742 /* warning: addr must be aligned */
3743 uint32_t lduw_phys(target_phys_addr_t addr)
3745 int io_index;
3746 uint8_t *ptr;
3747 uint64_t val;
3748 unsigned long pd;
3749 PhysPageDesc *p;
3751 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3752 if (!p) {
3753 pd = IO_MEM_UNASSIGNED;
3754 } else {
3755 pd = p->phys_offset;
3758 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3759 !(pd & IO_MEM_ROMD)) {
3760 /* I/O case */
3761 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3762 if (p)
3763 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3764 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
3765 } else {
3766 /* RAM case */
3767 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3768 (addr & ~TARGET_PAGE_MASK);
3769 val = lduw_p(ptr);
3771 return val;
3774 /* warning: addr must be aligned. The ram page is not masked as dirty
3775 and the code inside is not invalidated. It is useful if the dirty
3776 bits are used to track modified PTEs */
3777 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3779 int io_index;
3780 uint8_t *ptr;
3781 unsigned long pd;
3782 PhysPageDesc *p;
3784 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3785 if (!p) {
3786 pd = IO_MEM_UNASSIGNED;
3787 } else {
3788 pd = p->phys_offset;
3791 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3792 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3793 if (p)
3794 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3795 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
3796 } else {
3797 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3798 ptr = qemu_get_ram_ptr(addr1);
3799 stl_p(ptr, val);
3801 if (unlikely(in_migration)) {
3802 if (!cpu_physical_memory_is_dirty(addr1)) {
3803 /* invalidate code */
3804 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3805 /* set dirty bit */
3806 cpu_physical_memory_set_dirty_flags(
3807 addr1, (0xff & ~CODE_DIRTY_FLAG));
3813 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3815 int io_index;
3816 uint8_t *ptr;
3817 unsigned long pd;
3818 PhysPageDesc *p;
3820 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3821 if (!p) {
3822 pd = IO_MEM_UNASSIGNED;
3823 } else {
3824 pd = p->phys_offset;
3827 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3828 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3829 if (p)
3830 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3831 #ifdef TARGET_WORDS_BIGENDIAN
3832 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
3833 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
3834 #else
3835 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
3836 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
3837 #endif
3838 } else {
3839 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3840 (addr & ~TARGET_PAGE_MASK);
3841 stq_p(ptr, val);
3845 /* warning: addr must be aligned */
3846 void stl_phys(target_phys_addr_t addr, uint32_t val)
3848 int io_index;
3849 uint8_t *ptr;
3850 unsigned long pd;
3851 PhysPageDesc *p;
3853 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3854 if (!p) {
3855 pd = IO_MEM_UNASSIGNED;
3856 } else {
3857 pd = p->phys_offset;
3860 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3861 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3862 if (p)
3863 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3864 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
3865 } else {
3866 unsigned long addr1;
3867 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3868 /* RAM case */
3869 ptr = qemu_get_ram_ptr(addr1);
3870 stl_p(ptr, val);
3871 if (!cpu_physical_memory_is_dirty(addr1)) {
3872 /* invalidate code */
3873 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3874 /* set dirty bit */
3875 cpu_physical_memory_set_dirty_flags(addr1,
3876 (0xff & ~CODE_DIRTY_FLAG));
3881 /* XXX: optimize */
3882 void stb_phys(target_phys_addr_t addr, uint32_t val)
3884 uint8_t v = val;
3885 cpu_physical_memory_write(addr, &v, 1);
3888 /* warning: addr must be aligned */
3889 void stw_phys(target_phys_addr_t addr, uint32_t val)
3891 int io_index;
3892 uint8_t *ptr;
3893 unsigned long pd;
3894 PhysPageDesc *p;
3896 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3897 if (!p) {
3898 pd = IO_MEM_UNASSIGNED;
3899 } else {
3900 pd = p->phys_offset;
3903 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3904 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3905 if (p)
3906 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3907 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
3908 } else {
3909 unsigned long addr1;
3910 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3911 /* RAM case */
3912 ptr = qemu_get_ram_ptr(addr1);
3913 stw_p(ptr, val);
3914 if (!cpu_physical_memory_is_dirty(addr1)) {
3915 /* invalidate code */
3916 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
3917 /* set dirty bit */
3918 cpu_physical_memory_set_dirty_flags(addr1,
3919 (0xff & ~CODE_DIRTY_FLAG));
3924 /* XXX: optimize */
3925 void stq_phys(target_phys_addr_t addr, uint64_t val)
3927 val = tswap64(val);
3928 cpu_physical_memory_write(addr, (const uint8_t *)&val, 8);
3931 /* virtual memory access for debug (includes writing to ROM) */
3932 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3933 uint8_t *buf, int len, int is_write)
3935 int l;
3936 target_phys_addr_t phys_addr;
3937 target_ulong page;
3939 while (len > 0) {
3940 page = addr & TARGET_PAGE_MASK;
3941 phys_addr = cpu_get_phys_page_debug(env, page);
3942 /* if no physical page mapped, return an error */
3943 if (phys_addr == -1)
3944 return -1;
3945 l = (page + TARGET_PAGE_SIZE) - addr;
3946 if (l > len)
3947 l = len;
3948 phys_addr += (addr & ~TARGET_PAGE_MASK);
3949 if (is_write)
3950 cpu_physical_memory_write_rom(phys_addr, buf, l);
3951 else
3952 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
3953 len -= l;
3954 buf += l;
3955 addr += l;
3957 return 0;
3959 #endif
3961 /* in deterministic execution mode, instructions doing device I/Os
3962 must be at the end of the TB */
3963 void cpu_io_recompile(CPUState *env, void *retaddr)
3965 TranslationBlock *tb;
3966 uint32_t n, cflags;
3967 target_ulong pc, cs_base;
3968 uint64_t flags;
3970 tb = tb_find_pc((unsigned long)retaddr);
3971 if (!tb) {
3972 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
3973 retaddr);
3975 n = env->icount_decr.u16.low + tb->icount;
3976 cpu_restore_state(tb, env, (unsigned long)retaddr, NULL);
3977 /* Calculate how many instructions had been executed before the fault
3978 occurred. */
3979 n = n - env->icount_decr.u16.low;
3980 /* Generate a new TB ending on the I/O insn. */
3981 n++;
3982 /* On MIPS and SH, delay slot instructions can only be restarted if
3983 they were already the first instruction in the TB. If this is not
3984 the first instruction in a TB then re-execute the preceding
3985 branch. */
3986 #if defined(TARGET_MIPS)
3987 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
3988 env->active_tc.PC -= 4;
3989 env->icount_decr.u16.low++;
3990 env->hflags &= ~MIPS_HFLAG_BMASK;
3992 #elif defined(TARGET_SH4)
3993 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
3994 && n > 1) {
3995 env->pc -= 2;
3996 env->icount_decr.u16.low++;
3997 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
3999 #endif
4000 /* This should never happen. */
4001 if (n > CF_COUNT_MASK)
4002 cpu_abort(env, "TB too big during recompile");
4004 cflags = n | CF_LAST_IO;
4005 pc = tb->pc;
4006 cs_base = tb->cs_base;
4007 flags = tb->flags;
4008 tb_phys_invalidate(tb, -1);
4009 /* FIXME: In theory this could raise an exception. In practice
4010 we have already translated the block once so it's probably ok. */
4011 tb_gen_code(env, pc, cs_base, flags, cflags);
4012 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4013 the first in the TB) then we end up generating a whole new TB and
4014 repeating the fault, which is horribly inefficient.
4015 Better would be to execute just this insn uncached, or generate a
4016 second new TB. */
4017 cpu_resume_from_signal(env, NULL);
4020 #if !defined(CONFIG_USER_ONLY)
4022 void dump_exec_info(FILE *f,
4023 int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
4025 int i, target_code_size, max_target_code_size;
4026 int direct_jmp_count, direct_jmp2_count, cross_page;
4027 TranslationBlock *tb;
4029 target_code_size = 0;
4030 max_target_code_size = 0;
4031 cross_page = 0;
4032 direct_jmp_count = 0;
4033 direct_jmp2_count = 0;
4034 for(i = 0; i < nb_tbs; i++) {
4035 tb = &tbs[i];
4036 target_code_size += tb->size;
4037 if (tb->size > max_target_code_size)
4038 max_target_code_size = tb->size;
4039 if (tb->page_addr[1] != -1)
4040 cross_page++;
4041 if (tb->tb_next_offset[0] != 0xffff) {
4042 direct_jmp_count++;
4043 if (tb->tb_next_offset[1] != 0xffff) {
4044 direct_jmp2_count++;
4048 /* XXX: avoid using doubles ? */
4049 cpu_fprintf(f, "Translation buffer state:\n");
4050 cpu_fprintf(f, "gen code size %ld/%ld\n",
4051 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4052 cpu_fprintf(f, "TB count %d/%d\n",
4053 nb_tbs, code_gen_max_blocks);
4054 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4055 nb_tbs ? target_code_size / nb_tbs : 0,
4056 max_target_code_size);
4057 cpu_fprintf(f, "TB avg host size %d bytes (expansion ratio: %0.1f)\n",
4058 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4059 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4060 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4061 cross_page,
4062 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4063 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4064 direct_jmp_count,
4065 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4066 direct_jmp2_count,
4067 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4068 cpu_fprintf(f, "\nStatistics:\n");
4069 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4070 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4071 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4072 tcg_dump_info(f, cpu_fprintf);
4075 #define MMUSUFFIX _cmmu
4076 #define GETPC() NULL
4077 #define env cpu_single_env
4078 #define SOFTMMU_CODE_ACCESS
4080 #define SHIFT 0
4081 #include "softmmu_template.h"
4083 #define SHIFT 1
4084 #include "softmmu_template.h"
4086 #define SHIFT 2
4087 #include "softmmu_template.h"
4089 #define SHIFT 3
4090 #include "softmmu_template.h"
4092 #undef env
4094 #endif