Revert "Avoid page_set_flags() assert in qemu-user host page protection code"
[qemu/aliguori-queue.git] / exec.c
blob76163aa87f43034d771178be909a1fb5b48a5f0d
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <unistd.h>
32 #include <inttypes.h>
34 #include "cpu.h"
35 #include "exec-all.h"
36 #include "qemu-common.h"
37 #include "tcg.h"
38 #include "hw/hw.h"
39 #include "osdep.h"
40 #include "kvm.h"
41 #include "qemu-timer.h"
42 #if defined(CONFIG_USER_ONLY)
43 #include <qemu.h>
44 #include <signal.h>
45 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
46 #include <sys/param.h>
47 #if __FreeBSD_version >= 700104
48 #define HAVE_KINFO_GETVMMAP
49 #define sigqueue sigqueue_freebsd /* avoid redefinition */
50 #include <sys/time.h>
51 #include <sys/proc.h>
52 #include <machine/profile.h>
53 #define _KERNEL
54 #include <sys/user.h>
55 #undef _KERNEL
56 #undef sigqueue
57 #include <libutil.h>
58 #endif
59 #endif
60 #endif
62 //#define DEBUG_TB_INVALIDATE
63 //#define DEBUG_FLUSH
64 //#define DEBUG_TLB
65 //#define DEBUG_UNASSIGNED
67 /* make various TB consistency checks */
68 //#define DEBUG_TB_CHECK
69 //#define DEBUG_TLB_CHECK
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
79 #define SMC_BITMAP_USE_THRESHOLD 10
81 static TranslationBlock *tbs;
82 int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88 #if defined(__arm__) || defined(__sparc_v9__)
89 /* The prologue must be reachable with a direct jump. ARM and Sparc64
90 have limited branch ranges (possibly also PPC) so place it in a
91 section close to code segment. */
92 #define code_gen_section \
93 __attribute__((__section__(".gen_code"))) \
94 __attribute__((aligned (32)))
95 #elif defined(_WIN32)
96 /* Maximum alignment for Win32 is 16. */
97 #define code_gen_section \
98 __attribute__((aligned (16)))
99 #else
100 #define code_gen_section \
101 __attribute__((aligned (32)))
102 #endif
104 uint8_t code_gen_prologue[1024] code_gen_section;
105 static uint8_t *code_gen_buffer;
106 static unsigned long code_gen_buffer_size;
107 /* threshold to flush the translated code buffer */
108 static unsigned long code_gen_buffer_max_size;
109 uint8_t *code_gen_ptr;
111 #if !defined(CONFIG_USER_ONLY)
112 int phys_ram_fd;
113 uint8_t *phys_ram_dirty;
114 static int in_migration;
116 typedef struct RAMBlock {
117 uint8_t *host;
118 ram_addr_t offset;
119 ram_addr_t length;
120 struct RAMBlock *next;
121 } RAMBlock;
123 static RAMBlock *ram_blocks;
124 /* TODO: When we implement (and use) ram deallocation (e.g. for hotplug)
125 then we can no longer assume contiguous ram offsets, and external uses
126 of this variable will break. */
127 ram_addr_t last_ram_offset;
128 #endif
130 CPUState *first_cpu;
131 /* current CPU in the current thread. It is only valid inside
132 cpu_exec() */
133 CPUState *cpu_single_env;
134 /* 0 = Do not count executed instructions.
135 1 = Precise instruction counting.
136 2 = Adaptive rate instruction counting. */
137 int use_icount = 0;
138 /* Current instruction counter. While executing translated code this may
139 include some instructions that have not yet been executed. */
140 int64_t qemu_icount;
142 typedef struct PageDesc {
143 /* list of TBs intersecting this ram page */
144 TranslationBlock *first_tb;
145 /* in order to optimize self modifying code, we count the number
146 of lookups we do to a given page to use a bitmap */
147 unsigned int code_write_count;
148 uint8_t *code_bitmap;
149 #if defined(CONFIG_USER_ONLY)
150 unsigned long flags;
151 #endif
152 } PageDesc;
154 /* In system mode we want L1_MAP to be based on ram offsets,
155 while in user mode we want it to be based on virtual addresses. */
156 #if !defined(CONFIG_USER_ONLY)
157 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
158 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
159 #else
160 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
161 #endif
162 #else
163 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
164 #endif
166 /* Size of the L2 (and L3, etc) page tables. */
167 #define L2_BITS 10
168 #define L2_SIZE (1 << L2_BITS)
170 /* The bits remaining after N lower levels of page tables. */
171 #define P_L1_BITS_REM \
172 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
173 #define V_L1_BITS_REM \
174 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
176 /* Size of the L1 page table. Avoid silly small sizes. */
177 #if P_L1_BITS_REM < 4
178 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
179 #else
180 #define P_L1_BITS P_L1_BITS_REM
181 #endif
183 #if V_L1_BITS_REM < 4
184 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
185 #else
186 #define V_L1_BITS V_L1_BITS_REM
187 #endif
189 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
190 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
192 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
193 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
195 unsigned long qemu_real_host_page_size;
196 unsigned long qemu_host_page_bits;
197 unsigned long qemu_host_page_size;
198 unsigned long qemu_host_page_mask;
200 /* This is a multi-level map on the virtual address space.
201 The bottom level has pointers to PageDesc. */
202 static void *l1_map[V_L1_SIZE];
204 #if !defined(CONFIG_USER_ONLY)
205 typedef struct PhysPageDesc {
206 /* offset in host memory of the page + io_index in the low bits */
207 ram_addr_t phys_offset;
208 ram_addr_t region_offset;
209 } PhysPageDesc;
211 /* This is a multi-level map on the physical address space.
212 The bottom level has pointers to PhysPageDesc. */
213 static void *l1_phys_map[P_L1_SIZE];
215 static void io_mem_init(void);
217 /* io memory support */
218 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
219 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
220 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
221 static char io_mem_used[IO_MEM_NB_ENTRIES];
222 static int io_mem_watch;
223 #endif
225 /* log support */
226 #ifdef WIN32
227 static const char *logfilename = "qemu.log";
228 #else
229 static const char *logfilename = "/tmp/qemu.log";
230 #endif
231 FILE *logfile;
232 int loglevel;
233 static int log_append = 0;
235 /* statistics */
236 #if !defined(CONFIG_USER_ONLY)
237 static int tlb_flush_count;
238 #endif
239 static int tb_flush_count;
240 static int tb_phys_invalidate_count;
242 #ifdef _WIN32
243 static void map_exec(void *addr, long size)
245 DWORD old_protect;
246 VirtualProtect(addr, size,
247 PAGE_EXECUTE_READWRITE, &old_protect);
250 #else
251 static void map_exec(void *addr, long size)
253 unsigned long start, end, page_size;
255 page_size = getpagesize();
256 start = (unsigned long)addr;
257 start &= ~(page_size - 1);
259 end = (unsigned long)addr + size;
260 end += page_size - 1;
261 end &= ~(page_size - 1);
263 mprotect((void *)start, end - start,
264 PROT_READ | PROT_WRITE | PROT_EXEC);
266 #endif
268 static void page_init(void)
270 /* NOTE: we can always suppose that qemu_host_page_size >=
271 TARGET_PAGE_SIZE */
272 #ifdef _WIN32
274 SYSTEM_INFO system_info;
276 GetSystemInfo(&system_info);
277 qemu_real_host_page_size = system_info.dwPageSize;
279 #else
280 qemu_real_host_page_size = getpagesize();
281 #endif
282 if (qemu_host_page_size == 0)
283 qemu_host_page_size = qemu_real_host_page_size;
284 if (qemu_host_page_size < TARGET_PAGE_SIZE)
285 qemu_host_page_size = TARGET_PAGE_SIZE;
286 qemu_host_page_bits = 0;
287 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
288 qemu_host_page_bits++;
289 qemu_host_page_mask = ~(qemu_host_page_size - 1);
291 #if !defined(_WIN32) && defined(CONFIG_USER_ONLY)
293 #ifdef HAVE_KINFO_GETVMMAP
294 struct kinfo_vmentry *freep;
295 int i, cnt;
297 freep = kinfo_getvmmap(getpid(), &cnt);
298 if (freep) {
299 mmap_lock();
300 for (i = 0; i < cnt; i++) {
301 unsigned long startaddr, endaddr;
303 startaddr = freep[i].kve_start;
304 endaddr = freep[i].kve_end;
305 if (h2g_valid(startaddr)) {
306 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
308 if (h2g_valid(endaddr)) {
309 endaddr = h2g(endaddr);
310 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
311 } else {
312 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
313 endaddr = ~0ul;
314 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
315 #endif
319 free(freep);
320 mmap_unlock();
322 #else
323 FILE *f;
325 last_brk = (unsigned long)sbrk(0);
327 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
328 f = fopen("/compat/linux/proc/self/maps", "r");
329 #else
330 f = fopen("/proc/self/maps", "r");
331 #endif
332 if (f) {
333 mmap_lock();
335 do {
336 unsigned long startaddr, endaddr;
337 int n;
339 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
341 if (n == 2 && h2g_valid(startaddr)) {
342 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
344 if (h2g_valid(endaddr)) {
345 endaddr = h2g(endaddr);
346 } else {
347 endaddr = ~0ul;
349 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
351 } while (!feof(f));
353 fclose(f);
354 mmap_unlock();
356 #endif
358 #endif
361 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
363 PageDesc *pd;
364 void **lp;
365 int i;
367 #if defined(CONFIG_USER_ONLY)
368 /* We can't use qemu_malloc because it may recurse into a locked mutex.
369 Neither can we record the new pages we reserve while allocating a
370 given page because that may recurse into an unallocated page table
371 entry. Stuff the allocations we do make into a queue and process
372 them after having completed one entire page table allocation. */
374 unsigned long reserve[2 * (V_L1_SHIFT / L2_BITS)];
375 int reserve_idx = 0;
377 # define ALLOC(P, SIZE) \
378 do { \
379 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
380 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
381 if (h2g_valid(P)) { \
382 reserve[reserve_idx] = h2g(P); \
383 reserve[reserve_idx + 1] = SIZE; \
384 reserve_idx += 2; \
386 } while (0)
387 #else
388 # define ALLOC(P, SIZE) \
389 do { P = qemu_mallocz(SIZE); } while (0)
390 #endif
392 /* Level 1. Always allocated. */
393 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
395 /* Level 2..N-1. */
396 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
397 void **p = *lp;
399 if (p == NULL) {
400 if (!alloc) {
401 return NULL;
403 ALLOC(p, sizeof(void *) * L2_SIZE);
404 *lp = p;
407 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
410 pd = *lp;
411 if (pd == NULL) {
412 if (!alloc) {
413 return NULL;
415 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
416 *lp = pd;
419 #undef ALLOC
420 #if defined(CONFIG_USER_ONLY)
421 for (i = 0; i < reserve_idx; i += 2) {
422 unsigned long addr = reserve[i];
423 unsigned long len = reserve[i + 1];
425 page_set_flags(addr & TARGET_PAGE_MASK,
426 TARGET_PAGE_ALIGN(addr + len),
427 PAGE_RESERVED);
429 #endif
431 return pd + (index & (L2_SIZE - 1));
434 static inline PageDesc *page_find(tb_page_addr_t index)
436 return page_find_alloc(index, 0);
439 #if !defined(CONFIG_USER_ONLY)
440 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
442 PhysPageDesc *pd;
443 void **lp;
444 int i;
446 /* Level 1. Always allocated. */
447 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
449 /* Level 2..N-1. */
450 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
451 void **p = *lp;
452 if (p == NULL) {
453 if (!alloc) {
454 return NULL;
456 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
458 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
461 pd = *lp;
462 if (pd == NULL) {
463 int i;
465 if (!alloc) {
466 return NULL;
469 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
471 for (i = 0; i < L2_SIZE; i++) {
472 pd[i].phys_offset = IO_MEM_UNASSIGNED;
473 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
477 return pd + (index & (L2_SIZE - 1));
480 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
482 return phys_page_find_alloc(index, 0);
485 static void tlb_protect_code(ram_addr_t ram_addr);
486 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
487 target_ulong vaddr);
488 #define mmap_lock() do { } while(0)
489 #define mmap_unlock() do { } while(0)
490 #endif
492 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
494 #if defined(CONFIG_USER_ONLY)
495 /* Currently it is not recommended to allocate big chunks of data in
496 user mode. It will change when a dedicated libc will be used */
497 #define USE_STATIC_CODE_GEN_BUFFER
498 #endif
500 #ifdef USE_STATIC_CODE_GEN_BUFFER
501 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
502 __attribute__((aligned (CODE_GEN_ALIGN)));
503 #endif
505 static void code_gen_alloc(unsigned long tb_size)
507 #ifdef USE_STATIC_CODE_GEN_BUFFER
508 code_gen_buffer = static_code_gen_buffer;
509 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
510 map_exec(code_gen_buffer, code_gen_buffer_size);
511 #else
512 code_gen_buffer_size = tb_size;
513 if (code_gen_buffer_size == 0) {
514 #if defined(CONFIG_USER_ONLY)
515 /* in user mode, phys_ram_size is not meaningful */
516 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
517 #else
518 /* XXX: needs adjustments */
519 code_gen_buffer_size = (unsigned long)(ram_size / 4);
520 #endif
522 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
523 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
524 /* The code gen buffer location may have constraints depending on
525 the host cpu and OS */
526 #if defined(__linux__)
528 int flags;
529 void *start = NULL;
531 flags = MAP_PRIVATE | MAP_ANONYMOUS;
532 #if defined(__x86_64__)
533 flags |= MAP_32BIT;
534 /* Cannot map more than that */
535 if (code_gen_buffer_size > (800 * 1024 * 1024))
536 code_gen_buffer_size = (800 * 1024 * 1024);
537 #elif defined(__sparc_v9__)
538 // Map the buffer below 2G, so we can use direct calls and branches
539 flags |= MAP_FIXED;
540 start = (void *) 0x60000000UL;
541 if (code_gen_buffer_size > (512 * 1024 * 1024))
542 code_gen_buffer_size = (512 * 1024 * 1024);
543 #elif defined(__arm__)
544 /* Map the buffer below 32M, so we can use direct calls and branches */
545 flags |= MAP_FIXED;
546 start = (void *) 0x01000000UL;
547 if (code_gen_buffer_size > 16 * 1024 * 1024)
548 code_gen_buffer_size = 16 * 1024 * 1024;
549 #endif
550 code_gen_buffer = mmap(start, code_gen_buffer_size,
551 PROT_WRITE | PROT_READ | PROT_EXEC,
552 flags, -1, 0);
553 if (code_gen_buffer == MAP_FAILED) {
554 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
555 exit(1);
558 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
560 int flags;
561 void *addr = NULL;
562 flags = MAP_PRIVATE | MAP_ANONYMOUS;
563 #if defined(__x86_64__)
564 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
565 * 0x40000000 is free */
566 flags |= MAP_FIXED;
567 addr = (void *)0x40000000;
568 /* Cannot map more than that */
569 if (code_gen_buffer_size > (800 * 1024 * 1024))
570 code_gen_buffer_size = (800 * 1024 * 1024);
571 #endif
572 code_gen_buffer = mmap(addr, code_gen_buffer_size,
573 PROT_WRITE | PROT_READ | PROT_EXEC,
574 flags, -1, 0);
575 if (code_gen_buffer == MAP_FAILED) {
576 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
577 exit(1);
580 #else
581 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
582 map_exec(code_gen_buffer, code_gen_buffer_size);
583 #endif
584 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
585 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
586 code_gen_buffer_max_size = code_gen_buffer_size -
587 code_gen_max_block_size();
588 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
589 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
592 /* Must be called before using the QEMU cpus. 'tb_size' is the size
593 (in bytes) allocated to the translation buffer. Zero means default
594 size. */
595 void cpu_exec_init_all(unsigned long tb_size)
597 cpu_gen_init();
598 code_gen_alloc(tb_size);
599 code_gen_ptr = code_gen_buffer;
600 page_init();
601 #if !defined(CONFIG_USER_ONLY)
602 io_mem_init();
603 #endif
606 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
608 static int cpu_common_post_load(void *opaque, int version_id)
610 CPUState *env = opaque;
612 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
613 version_id is increased. */
614 env->interrupt_request &= ~0x01;
615 tlb_flush(env, 1);
617 return 0;
620 static const VMStateDescription vmstate_cpu_common = {
621 .name = "cpu_common",
622 .version_id = 1,
623 .minimum_version_id = 1,
624 .minimum_version_id_old = 1,
625 .post_load = cpu_common_post_load,
626 .fields = (VMStateField []) {
627 VMSTATE_UINT32(halted, CPUState),
628 VMSTATE_UINT32(interrupt_request, CPUState),
629 VMSTATE_END_OF_LIST()
632 #endif
634 CPUState *qemu_get_cpu(int cpu)
636 CPUState *env = first_cpu;
638 while (env) {
639 if (env->cpu_index == cpu)
640 break;
641 env = env->next_cpu;
644 return env;
647 void cpu_exec_init(CPUState *env)
649 CPUState **penv;
650 int cpu_index;
652 #if defined(CONFIG_USER_ONLY)
653 cpu_list_lock();
654 #endif
655 env->next_cpu = NULL;
656 penv = &first_cpu;
657 cpu_index = 0;
658 while (*penv != NULL) {
659 penv = &(*penv)->next_cpu;
660 cpu_index++;
662 env->cpu_index = cpu_index;
663 env->numa_node = 0;
664 QTAILQ_INIT(&env->breakpoints);
665 QTAILQ_INIT(&env->watchpoints);
666 *penv = env;
667 #if defined(CONFIG_USER_ONLY)
668 cpu_list_unlock();
669 #endif
670 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
671 vmstate_register(cpu_index, &vmstate_cpu_common, env);
672 register_savevm("cpu", cpu_index, CPU_SAVE_VERSION,
673 cpu_save, cpu_load, env);
674 #endif
677 static inline void invalidate_page_bitmap(PageDesc *p)
679 if (p->code_bitmap) {
680 qemu_free(p->code_bitmap);
681 p->code_bitmap = NULL;
683 p->code_write_count = 0;
686 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
688 static void page_flush_tb_1 (int level, void **lp)
690 int i;
692 if (*lp == NULL) {
693 return;
695 if (level == 0) {
696 PageDesc *pd = *lp;
697 for (i = 0; i < L2_SIZE; ++i) {
698 pd[i].first_tb = NULL;
699 invalidate_page_bitmap(pd + i);
701 } else {
702 void **pp = *lp;
703 for (i = 0; i < L2_SIZE; ++i) {
704 page_flush_tb_1 (level - 1, pp + i);
709 static void page_flush_tb(void)
711 int i;
712 for (i = 0; i < V_L1_SIZE; i++) {
713 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
717 /* flush all the translation blocks */
718 /* XXX: tb_flush is currently not thread safe */
719 void tb_flush(CPUState *env1)
721 CPUState *env;
722 #if defined(DEBUG_FLUSH)
723 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
724 (unsigned long)(code_gen_ptr - code_gen_buffer),
725 nb_tbs, nb_tbs > 0 ?
726 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
727 #endif
728 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
729 cpu_abort(env1, "Internal error: code buffer overflow\n");
731 nb_tbs = 0;
733 for(env = first_cpu; env != NULL; env = env->next_cpu) {
734 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
737 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
738 page_flush_tb();
740 code_gen_ptr = code_gen_buffer;
741 /* XXX: flush processor icache at this point if cache flush is
742 expensive */
743 tb_flush_count++;
746 #ifdef DEBUG_TB_CHECK
748 static void tb_invalidate_check(target_ulong address)
750 TranslationBlock *tb;
751 int i;
752 address &= TARGET_PAGE_MASK;
753 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
754 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
755 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
756 address >= tb->pc + tb->size)) {
757 printf("ERROR invalidate: address=" TARGET_FMT_lx
758 " PC=%08lx size=%04x\n",
759 address, (long)tb->pc, tb->size);
765 /* verify that all the pages have correct rights for code */
766 static void tb_page_check(void)
768 TranslationBlock *tb;
769 int i, flags1, flags2;
771 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
772 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
773 flags1 = page_get_flags(tb->pc);
774 flags2 = page_get_flags(tb->pc + tb->size - 1);
775 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
776 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
777 (long)tb->pc, tb->size, flags1, flags2);
783 #endif
785 /* invalidate one TB */
786 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
787 int next_offset)
789 TranslationBlock *tb1;
790 for(;;) {
791 tb1 = *ptb;
792 if (tb1 == tb) {
793 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
794 break;
796 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
800 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
802 TranslationBlock *tb1;
803 unsigned int n1;
805 for(;;) {
806 tb1 = *ptb;
807 n1 = (long)tb1 & 3;
808 tb1 = (TranslationBlock *)((long)tb1 & ~3);
809 if (tb1 == tb) {
810 *ptb = tb1->page_next[n1];
811 break;
813 ptb = &tb1->page_next[n1];
817 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
819 TranslationBlock *tb1, **ptb;
820 unsigned int n1;
822 ptb = &tb->jmp_next[n];
823 tb1 = *ptb;
824 if (tb1) {
825 /* find tb(n) in circular list */
826 for(;;) {
827 tb1 = *ptb;
828 n1 = (long)tb1 & 3;
829 tb1 = (TranslationBlock *)((long)tb1 & ~3);
830 if (n1 == n && tb1 == tb)
831 break;
832 if (n1 == 2) {
833 ptb = &tb1->jmp_first;
834 } else {
835 ptb = &tb1->jmp_next[n1];
838 /* now we can suppress tb(n) from the list */
839 *ptb = tb->jmp_next[n];
841 tb->jmp_next[n] = NULL;
845 /* reset the jump entry 'n' of a TB so that it is not chained to
846 another TB */
847 static inline void tb_reset_jump(TranslationBlock *tb, int n)
849 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
852 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
854 CPUState *env;
855 PageDesc *p;
856 unsigned int h, n1;
857 tb_page_addr_t phys_pc;
858 TranslationBlock *tb1, *tb2;
860 /* remove the TB from the hash list */
861 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
862 h = tb_phys_hash_func(phys_pc);
863 tb_remove(&tb_phys_hash[h], tb,
864 offsetof(TranslationBlock, phys_hash_next));
866 /* remove the TB from the page list */
867 if (tb->page_addr[0] != page_addr) {
868 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
869 tb_page_remove(&p->first_tb, tb);
870 invalidate_page_bitmap(p);
872 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
873 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
874 tb_page_remove(&p->first_tb, tb);
875 invalidate_page_bitmap(p);
878 tb_invalidated_flag = 1;
880 /* remove the TB from the hash list */
881 h = tb_jmp_cache_hash_func(tb->pc);
882 for(env = first_cpu; env != NULL; env = env->next_cpu) {
883 if (env->tb_jmp_cache[h] == tb)
884 env->tb_jmp_cache[h] = NULL;
887 /* suppress this TB from the two jump lists */
888 tb_jmp_remove(tb, 0);
889 tb_jmp_remove(tb, 1);
891 /* suppress any remaining jumps to this TB */
892 tb1 = tb->jmp_first;
893 for(;;) {
894 n1 = (long)tb1 & 3;
895 if (n1 == 2)
896 break;
897 tb1 = (TranslationBlock *)((long)tb1 & ~3);
898 tb2 = tb1->jmp_next[n1];
899 tb_reset_jump(tb1, n1);
900 tb1->jmp_next[n1] = NULL;
901 tb1 = tb2;
903 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
905 tb_phys_invalidate_count++;
908 static inline void set_bits(uint8_t *tab, int start, int len)
910 int end, mask, end1;
912 end = start + len;
913 tab += start >> 3;
914 mask = 0xff << (start & 7);
915 if ((start & ~7) == (end & ~7)) {
916 if (start < end) {
917 mask &= ~(0xff << (end & 7));
918 *tab |= mask;
920 } else {
921 *tab++ |= mask;
922 start = (start + 8) & ~7;
923 end1 = end & ~7;
924 while (start < end1) {
925 *tab++ = 0xff;
926 start += 8;
928 if (start < end) {
929 mask = ~(0xff << (end & 7));
930 *tab |= mask;
935 static void build_page_bitmap(PageDesc *p)
937 int n, tb_start, tb_end;
938 TranslationBlock *tb;
940 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
942 tb = p->first_tb;
943 while (tb != NULL) {
944 n = (long)tb & 3;
945 tb = (TranslationBlock *)((long)tb & ~3);
946 /* NOTE: this is subtle as a TB may span two physical pages */
947 if (n == 0) {
948 /* NOTE: tb_end may be after the end of the page, but
949 it is not a problem */
950 tb_start = tb->pc & ~TARGET_PAGE_MASK;
951 tb_end = tb_start + tb->size;
952 if (tb_end > TARGET_PAGE_SIZE)
953 tb_end = TARGET_PAGE_SIZE;
954 } else {
955 tb_start = 0;
956 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
958 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
959 tb = tb->page_next[n];
963 TranslationBlock *tb_gen_code(CPUState *env,
964 target_ulong pc, target_ulong cs_base,
965 int flags, int cflags)
967 TranslationBlock *tb;
968 uint8_t *tc_ptr;
969 tb_page_addr_t phys_pc, phys_page2;
970 target_ulong virt_page2;
971 int code_gen_size;
973 phys_pc = get_page_addr_code(env, pc);
974 tb = tb_alloc(pc);
975 if (!tb) {
976 /* flush must be done */
977 tb_flush(env);
978 /* cannot fail at this point */
979 tb = tb_alloc(pc);
980 /* Don't forget to invalidate previous TB info. */
981 tb_invalidated_flag = 1;
983 tc_ptr = code_gen_ptr;
984 tb->tc_ptr = tc_ptr;
985 tb->cs_base = cs_base;
986 tb->flags = flags;
987 tb->cflags = cflags;
988 cpu_gen_code(env, tb, &code_gen_size);
989 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
991 /* check next page if needed */
992 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
993 phys_page2 = -1;
994 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
995 phys_page2 = get_page_addr_code(env, virt_page2);
997 tb_link_page(tb, phys_pc, phys_page2);
998 return tb;
1001 /* invalidate all TBs which intersect with the target physical page
1002 starting in range [start;end[. NOTE: start and end must refer to
1003 the same physical page. 'is_cpu_write_access' should be true if called
1004 from a real cpu write access: the virtual CPU will exit the current
1005 TB if code is modified inside this TB. */
1006 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1007 int is_cpu_write_access)
1009 TranslationBlock *tb, *tb_next, *saved_tb;
1010 CPUState *env = cpu_single_env;
1011 tb_page_addr_t tb_start, tb_end;
1012 PageDesc *p;
1013 int n;
1014 #ifdef TARGET_HAS_PRECISE_SMC
1015 int current_tb_not_found = is_cpu_write_access;
1016 TranslationBlock *current_tb = NULL;
1017 int current_tb_modified = 0;
1018 target_ulong current_pc = 0;
1019 target_ulong current_cs_base = 0;
1020 int current_flags = 0;
1021 #endif /* TARGET_HAS_PRECISE_SMC */
1023 p = page_find(start >> TARGET_PAGE_BITS);
1024 if (!p)
1025 return;
1026 if (!p->code_bitmap &&
1027 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1028 is_cpu_write_access) {
1029 /* build code bitmap */
1030 build_page_bitmap(p);
1033 /* we remove all the TBs in the range [start, end[ */
1034 /* XXX: see if in some cases it could be faster to invalidate all the code */
1035 tb = p->first_tb;
1036 while (tb != NULL) {
1037 n = (long)tb & 3;
1038 tb = (TranslationBlock *)((long)tb & ~3);
1039 tb_next = tb->page_next[n];
1040 /* NOTE: this is subtle as a TB may span two physical pages */
1041 if (n == 0) {
1042 /* NOTE: tb_end may be after the end of the page, but
1043 it is not a problem */
1044 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1045 tb_end = tb_start + tb->size;
1046 } else {
1047 tb_start = tb->page_addr[1];
1048 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1050 if (!(tb_end <= start || tb_start >= end)) {
1051 #ifdef TARGET_HAS_PRECISE_SMC
1052 if (current_tb_not_found) {
1053 current_tb_not_found = 0;
1054 current_tb = NULL;
1055 if (env->mem_io_pc) {
1056 /* now we have a real cpu fault */
1057 current_tb = tb_find_pc(env->mem_io_pc);
1060 if (current_tb == tb &&
1061 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1062 /* If we are modifying the current TB, we must stop
1063 its execution. We could be more precise by checking
1064 that the modification is after the current PC, but it
1065 would require a specialized function to partially
1066 restore the CPU state */
1068 current_tb_modified = 1;
1069 cpu_restore_state(current_tb, env,
1070 env->mem_io_pc, NULL);
1071 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1072 &current_flags);
1074 #endif /* TARGET_HAS_PRECISE_SMC */
1075 /* we need to do that to handle the case where a signal
1076 occurs while doing tb_phys_invalidate() */
1077 saved_tb = NULL;
1078 if (env) {
1079 saved_tb = env->current_tb;
1080 env->current_tb = NULL;
1082 tb_phys_invalidate(tb, -1);
1083 if (env) {
1084 env->current_tb = saved_tb;
1085 if (env->interrupt_request && env->current_tb)
1086 cpu_interrupt(env, env->interrupt_request);
1089 tb = tb_next;
1091 #if !defined(CONFIG_USER_ONLY)
1092 /* if no code remaining, no need to continue to use slow writes */
1093 if (!p->first_tb) {
1094 invalidate_page_bitmap(p);
1095 if (is_cpu_write_access) {
1096 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1099 #endif
1100 #ifdef TARGET_HAS_PRECISE_SMC
1101 if (current_tb_modified) {
1102 /* we generate a block containing just the instruction
1103 modifying the memory. It will ensure that it cannot modify
1104 itself */
1105 env->current_tb = NULL;
1106 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1107 cpu_resume_from_signal(env, NULL);
1109 #endif
1112 /* len must be <= 8 and start must be a multiple of len */
1113 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1115 PageDesc *p;
1116 int offset, b;
1117 #if 0
1118 if (1) {
1119 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1120 cpu_single_env->mem_io_vaddr, len,
1121 cpu_single_env->eip,
1122 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1124 #endif
1125 p = page_find(start >> TARGET_PAGE_BITS);
1126 if (!p)
1127 return;
1128 if (p->code_bitmap) {
1129 offset = start & ~TARGET_PAGE_MASK;
1130 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1131 if (b & ((1 << len) - 1))
1132 goto do_invalidate;
1133 } else {
1134 do_invalidate:
1135 tb_invalidate_phys_page_range(start, start + len, 1);
1139 #if !defined(CONFIG_SOFTMMU)
1140 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1141 unsigned long pc, void *puc)
1143 TranslationBlock *tb;
1144 PageDesc *p;
1145 int n;
1146 #ifdef TARGET_HAS_PRECISE_SMC
1147 TranslationBlock *current_tb = NULL;
1148 CPUState *env = cpu_single_env;
1149 int current_tb_modified = 0;
1150 target_ulong current_pc = 0;
1151 target_ulong current_cs_base = 0;
1152 int current_flags = 0;
1153 #endif
1155 addr &= TARGET_PAGE_MASK;
1156 p = page_find(addr >> TARGET_PAGE_BITS);
1157 if (!p)
1158 return;
1159 tb = p->first_tb;
1160 #ifdef TARGET_HAS_PRECISE_SMC
1161 if (tb && pc != 0) {
1162 current_tb = tb_find_pc(pc);
1164 #endif
1165 while (tb != NULL) {
1166 n = (long)tb & 3;
1167 tb = (TranslationBlock *)((long)tb & ~3);
1168 #ifdef TARGET_HAS_PRECISE_SMC
1169 if (current_tb == tb &&
1170 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1171 /* If we are modifying the current TB, we must stop
1172 its execution. We could be more precise by checking
1173 that the modification is after the current PC, but it
1174 would require a specialized function to partially
1175 restore the CPU state */
1177 current_tb_modified = 1;
1178 cpu_restore_state(current_tb, env, pc, puc);
1179 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1180 &current_flags);
1182 #endif /* TARGET_HAS_PRECISE_SMC */
1183 tb_phys_invalidate(tb, addr);
1184 tb = tb->page_next[n];
1186 p->first_tb = NULL;
1187 #ifdef TARGET_HAS_PRECISE_SMC
1188 if (current_tb_modified) {
1189 /* we generate a block containing just the instruction
1190 modifying the memory. It will ensure that it cannot modify
1191 itself */
1192 env->current_tb = NULL;
1193 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1194 cpu_resume_from_signal(env, puc);
1196 #endif
1198 #endif
1200 /* add the tb in the target page and protect it if necessary */
1201 static inline void tb_alloc_page(TranslationBlock *tb,
1202 unsigned int n, tb_page_addr_t page_addr)
1204 PageDesc *p;
1205 TranslationBlock *last_first_tb;
1207 tb->page_addr[n] = page_addr;
1208 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1209 tb->page_next[n] = p->first_tb;
1210 last_first_tb = p->first_tb;
1211 p->first_tb = (TranslationBlock *)((long)tb | n);
1212 invalidate_page_bitmap(p);
1214 #if defined(TARGET_HAS_SMC) || 1
1216 #if defined(CONFIG_USER_ONLY)
1217 if (p->flags & PAGE_WRITE) {
1218 target_ulong addr;
1219 PageDesc *p2;
1220 int prot;
1222 /* force the host page as non writable (writes will have a
1223 page fault + mprotect overhead) */
1224 page_addr &= qemu_host_page_mask;
1225 prot = 0;
1226 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1227 addr += TARGET_PAGE_SIZE) {
1229 p2 = page_find (addr >> TARGET_PAGE_BITS);
1230 if (!p2)
1231 continue;
1232 prot |= p2->flags;
1233 p2->flags &= ~PAGE_WRITE;
1235 mprotect(g2h(page_addr), qemu_host_page_size,
1236 (prot & PAGE_BITS) & ~PAGE_WRITE);
1237 #ifdef DEBUG_TB_INVALIDATE
1238 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1239 page_addr);
1240 #endif
1242 #else
1243 /* if some code is already present, then the pages are already
1244 protected. So we handle the case where only the first TB is
1245 allocated in a physical page */
1246 if (!last_first_tb) {
1247 tlb_protect_code(page_addr);
1249 #endif
1251 #endif /* TARGET_HAS_SMC */
1254 /* Allocate a new translation block. Flush the translation buffer if
1255 too many translation blocks or too much generated code. */
1256 TranslationBlock *tb_alloc(target_ulong pc)
1258 TranslationBlock *tb;
1260 if (nb_tbs >= code_gen_max_blocks ||
1261 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
1262 return NULL;
1263 tb = &tbs[nb_tbs++];
1264 tb->pc = pc;
1265 tb->cflags = 0;
1266 return tb;
1269 void tb_free(TranslationBlock *tb)
1271 /* In practice this is mostly used for single use temporary TB
1272 Ignore the hard cases and just back up if this TB happens to
1273 be the last one generated. */
1274 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
1275 code_gen_ptr = tb->tc_ptr;
1276 nb_tbs--;
1280 /* add a new TB and link it to the physical page tables. phys_page2 is
1281 (-1) to indicate that only one page contains the TB. */
1282 void tb_link_page(TranslationBlock *tb,
1283 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1285 unsigned int h;
1286 TranslationBlock **ptb;
1288 /* Grab the mmap lock to stop another thread invalidating this TB
1289 before we are done. */
1290 mmap_lock();
1291 /* add in the physical hash table */
1292 h = tb_phys_hash_func(phys_pc);
1293 ptb = &tb_phys_hash[h];
1294 tb->phys_hash_next = *ptb;
1295 *ptb = tb;
1297 /* add in the page list */
1298 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1299 if (phys_page2 != -1)
1300 tb_alloc_page(tb, 1, phys_page2);
1301 else
1302 tb->page_addr[1] = -1;
1304 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1305 tb->jmp_next[0] = NULL;
1306 tb->jmp_next[1] = NULL;
1308 /* init original jump addresses */
1309 if (tb->tb_next_offset[0] != 0xffff)
1310 tb_reset_jump(tb, 0);
1311 if (tb->tb_next_offset[1] != 0xffff)
1312 tb_reset_jump(tb, 1);
1314 #ifdef DEBUG_TB_CHECK
1315 tb_page_check();
1316 #endif
1317 mmap_unlock();
1320 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1321 tb[1].tc_ptr. Return NULL if not found */
1322 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1324 int m_min, m_max, m;
1325 unsigned long v;
1326 TranslationBlock *tb;
1328 if (nb_tbs <= 0)
1329 return NULL;
1330 if (tc_ptr < (unsigned long)code_gen_buffer ||
1331 tc_ptr >= (unsigned long)code_gen_ptr)
1332 return NULL;
1333 /* binary search (cf Knuth) */
1334 m_min = 0;
1335 m_max = nb_tbs - 1;
1336 while (m_min <= m_max) {
1337 m = (m_min + m_max) >> 1;
1338 tb = &tbs[m];
1339 v = (unsigned long)tb->tc_ptr;
1340 if (v == tc_ptr)
1341 return tb;
1342 else if (tc_ptr < v) {
1343 m_max = m - 1;
1344 } else {
1345 m_min = m + 1;
1348 return &tbs[m_max];
1351 static void tb_reset_jump_recursive(TranslationBlock *tb);
1353 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1355 TranslationBlock *tb1, *tb_next, **ptb;
1356 unsigned int n1;
1358 tb1 = tb->jmp_next[n];
1359 if (tb1 != NULL) {
1360 /* find head of list */
1361 for(;;) {
1362 n1 = (long)tb1 & 3;
1363 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1364 if (n1 == 2)
1365 break;
1366 tb1 = tb1->jmp_next[n1];
1368 /* we are now sure now that tb jumps to tb1 */
1369 tb_next = tb1;
1371 /* remove tb from the jmp_first list */
1372 ptb = &tb_next->jmp_first;
1373 for(;;) {
1374 tb1 = *ptb;
1375 n1 = (long)tb1 & 3;
1376 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1377 if (n1 == n && tb1 == tb)
1378 break;
1379 ptb = &tb1->jmp_next[n1];
1381 *ptb = tb->jmp_next[n];
1382 tb->jmp_next[n] = NULL;
1384 /* suppress the jump to next tb in generated code */
1385 tb_reset_jump(tb, n);
1387 /* suppress jumps in the tb on which we could have jumped */
1388 tb_reset_jump_recursive(tb_next);
1392 static void tb_reset_jump_recursive(TranslationBlock *tb)
1394 tb_reset_jump_recursive2(tb, 0);
1395 tb_reset_jump_recursive2(tb, 1);
1398 #if defined(TARGET_HAS_ICE)
1399 #if defined(CONFIG_USER_ONLY)
1400 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1402 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1404 #else
1405 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1407 target_phys_addr_t addr;
1408 target_ulong pd;
1409 ram_addr_t ram_addr;
1410 PhysPageDesc *p;
1412 addr = cpu_get_phys_page_debug(env, pc);
1413 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1414 if (!p) {
1415 pd = IO_MEM_UNASSIGNED;
1416 } else {
1417 pd = p->phys_offset;
1419 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1420 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1422 #endif
1423 #endif /* TARGET_HAS_ICE */
1425 #if defined(CONFIG_USER_ONLY)
1426 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1431 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1432 int flags, CPUWatchpoint **watchpoint)
1434 return -ENOSYS;
1436 #else
1437 /* Add a watchpoint. */
1438 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1439 int flags, CPUWatchpoint **watchpoint)
1441 target_ulong len_mask = ~(len - 1);
1442 CPUWatchpoint *wp;
1444 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1445 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1446 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1447 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1448 return -EINVAL;
1450 wp = qemu_malloc(sizeof(*wp));
1452 wp->vaddr = addr;
1453 wp->len_mask = len_mask;
1454 wp->flags = flags;
1456 /* keep all GDB-injected watchpoints in front */
1457 if (flags & BP_GDB)
1458 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1459 else
1460 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1462 tlb_flush_page(env, addr);
1464 if (watchpoint)
1465 *watchpoint = wp;
1466 return 0;
1469 /* Remove a specific watchpoint. */
1470 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1471 int flags)
1473 target_ulong len_mask = ~(len - 1);
1474 CPUWatchpoint *wp;
1476 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1477 if (addr == wp->vaddr && len_mask == wp->len_mask
1478 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1479 cpu_watchpoint_remove_by_ref(env, wp);
1480 return 0;
1483 return -ENOENT;
1486 /* Remove a specific watchpoint by reference. */
1487 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1489 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1491 tlb_flush_page(env, watchpoint->vaddr);
1493 qemu_free(watchpoint);
1496 /* Remove all matching watchpoints. */
1497 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1499 CPUWatchpoint *wp, *next;
1501 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1502 if (wp->flags & mask)
1503 cpu_watchpoint_remove_by_ref(env, wp);
1506 #endif
1508 /* Add a breakpoint. */
1509 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1510 CPUBreakpoint **breakpoint)
1512 #if defined(TARGET_HAS_ICE)
1513 CPUBreakpoint *bp;
1515 bp = qemu_malloc(sizeof(*bp));
1517 bp->pc = pc;
1518 bp->flags = flags;
1520 /* keep all GDB-injected breakpoints in front */
1521 if (flags & BP_GDB)
1522 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1523 else
1524 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1526 breakpoint_invalidate(env, pc);
1528 if (breakpoint)
1529 *breakpoint = bp;
1530 return 0;
1531 #else
1532 return -ENOSYS;
1533 #endif
1536 /* Remove a specific breakpoint. */
1537 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1539 #if defined(TARGET_HAS_ICE)
1540 CPUBreakpoint *bp;
1542 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1543 if (bp->pc == pc && bp->flags == flags) {
1544 cpu_breakpoint_remove_by_ref(env, bp);
1545 return 0;
1548 return -ENOENT;
1549 #else
1550 return -ENOSYS;
1551 #endif
1554 /* Remove a specific breakpoint by reference. */
1555 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1557 #if defined(TARGET_HAS_ICE)
1558 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1560 breakpoint_invalidate(env, breakpoint->pc);
1562 qemu_free(breakpoint);
1563 #endif
1566 /* Remove all matching breakpoints. */
1567 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1569 #if defined(TARGET_HAS_ICE)
1570 CPUBreakpoint *bp, *next;
1572 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1573 if (bp->flags & mask)
1574 cpu_breakpoint_remove_by_ref(env, bp);
1576 #endif
1579 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1580 CPU loop after each instruction */
1581 void cpu_single_step(CPUState *env, int enabled)
1583 #if defined(TARGET_HAS_ICE)
1584 if (env->singlestep_enabled != enabled) {
1585 env->singlestep_enabled = enabled;
1586 if (kvm_enabled())
1587 kvm_update_guest_debug(env, 0);
1588 else {
1589 /* must flush all the translated code to avoid inconsistencies */
1590 /* XXX: only flush what is necessary */
1591 tb_flush(env);
1594 #endif
1597 /* enable or disable low levels log */
1598 void cpu_set_log(int log_flags)
1600 loglevel = log_flags;
1601 if (loglevel && !logfile) {
1602 logfile = fopen(logfilename, log_append ? "a" : "w");
1603 if (!logfile) {
1604 perror(logfilename);
1605 _exit(1);
1607 #if !defined(CONFIG_SOFTMMU)
1608 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1610 static char logfile_buf[4096];
1611 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1613 #elif !defined(_WIN32)
1614 /* Win32 doesn't support line-buffering and requires size >= 2 */
1615 setvbuf(logfile, NULL, _IOLBF, 0);
1616 #endif
1617 log_append = 1;
1619 if (!loglevel && logfile) {
1620 fclose(logfile);
1621 logfile = NULL;
1625 void cpu_set_log_filename(const char *filename)
1627 logfilename = strdup(filename);
1628 if (logfile) {
1629 fclose(logfile);
1630 logfile = NULL;
1632 cpu_set_log(loglevel);
1635 static void cpu_unlink_tb(CPUState *env)
1637 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1638 problem and hope the cpu will stop of its own accord. For userspace
1639 emulation this often isn't actually as bad as it sounds. Often
1640 signals are used primarily to interrupt blocking syscalls. */
1641 TranslationBlock *tb;
1642 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1644 spin_lock(&interrupt_lock);
1645 tb = env->current_tb;
1646 /* if the cpu is currently executing code, we must unlink it and
1647 all the potentially executing TB */
1648 if (tb) {
1649 env->current_tb = NULL;
1650 tb_reset_jump_recursive(tb);
1652 spin_unlock(&interrupt_lock);
1655 /* mask must never be zero, except for A20 change call */
1656 void cpu_interrupt(CPUState *env, int mask)
1658 int old_mask;
1660 old_mask = env->interrupt_request;
1661 env->interrupt_request |= mask;
1663 #ifndef CONFIG_USER_ONLY
1665 * If called from iothread context, wake the target cpu in
1666 * case its halted.
1668 if (!qemu_cpu_self(env)) {
1669 qemu_cpu_kick(env);
1670 return;
1672 #endif
1674 if (use_icount) {
1675 env->icount_decr.u16.high = 0xffff;
1676 #ifndef CONFIG_USER_ONLY
1677 if (!can_do_io(env)
1678 && (mask & ~old_mask) != 0) {
1679 cpu_abort(env, "Raised interrupt while not in I/O function");
1681 #endif
1682 } else {
1683 cpu_unlink_tb(env);
1687 void cpu_reset_interrupt(CPUState *env, int mask)
1689 env->interrupt_request &= ~mask;
1692 void cpu_exit(CPUState *env)
1694 env->exit_request = 1;
1695 cpu_unlink_tb(env);
1698 const CPULogItem cpu_log_items[] = {
1699 { CPU_LOG_TB_OUT_ASM, "out_asm",
1700 "show generated host assembly code for each compiled TB" },
1701 { CPU_LOG_TB_IN_ASM, "in_asm",
1702 "show target assembly code for each compiled TB" },
1703 { CPU_LOG_TB_OP, "op",
1704 "show micro ops for each compiled TB" },
1705 { CPU_LOG_TB_OP_OPT, "op_opt",
1706 "show micro ops "
1707 #ifdef TARGET_I386
1708 "before eflags optimization and "
1709 #endif
1710 "after liveness analysis" },
1711 { CPU_LOG_INT, "int",
1712 "show interrupts/exceptions in short format" },
1713 { CPU_LOG_EXEC, "exec",
1714 "show trace before each executed TB (lots of logs)" },
1715 { CPU_LOG_TB_CPU, "cpu",
1716 "show CPU state before block translation" },
1717 #ifdef TARGET_I386
1718 { CPU_LOG_PCALL, "pcall",
1719 "show protected mode far calls/returns/exceptions" },
1720 { CPU_LOG_RESET, "cpu_reset",
1721 "show CPU state before CPU resets" },
1722 #endif
1723 #ifdef DEBUG_IOPORT
1724 { CPU_LOG_IOPORT, "ioport",
1725 "show all i/o ports accesses" },
1726 #endif
1727 { 0, NULL, NULL },
1730 #ifndef CONFIG_USER_ONLY
1731 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1732 = QLIST_HEAD_INITIALIZER(memory_client_list);
1734 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1735 ram_addr_t size,
1736 ram_addr_t phys_offset)
1738 CPUPhysMemoryClient *client;
1739 QLIST_FOREACH(client, &memory_client_list, list) {
1740 client->set_memory(client, start_addr, size, phys_offset);
1744 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1745 target_phys_addr_t end)
1747 CPUPhysMemoryClient *client;
1748 QLIST_FOREACH(client, &memory_client_list, list) {
1749 int r = client->sync_dirty_bitmap(client, start, end);
1750 if (r < 0)
1751 return r;
1753 return 0;
1756 static int cpu_notify_migration_log(int enable)
1758 CPUPhysMemoryClient *client;
1759 QLIST_FOREACH(client, &memory_client_list, list) {
1760 int r = client->migration_log(client, enable);
1761 if (r < 0)
1762 return r;
1764 return 0;
1767 static void phys_page_for_each_1(CPUPhysMemoryClient *client,
1768 int level, void **lp)
1770 int i;
1772 if (*lp == NULL) {
1773 return;
1775 if (level == 0) {
1776 PhysPageDesc *pd = *lp;
1777 for (i = 0; i < L2_SIZE; ++i) {
1778 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1779 client->set_memory(client, pd[i].region_offset,
1780 TARGET_PAGE_SIZE, pd[i].phys_offset);
1783 } else {
1784 void **pp = *lp;
1785 for (i = 0; i < L2_SIZE; ++i) {
1786 phys_page_for_each_1(client, level - 1, pp + i);
1791 static void phys_page_for_each(CPUPhysMemoryClient *client)
1793 int i;
1794 for (i = 0; i < P_L1_SIZE; ++i) {
1795 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1796 l1_phys_map + 1);
1800 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1802 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1803 phys_page_for_each(client);
1806 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1808 QLIST_REMOVE(client, list);
1810 #endif
1812 static int cmp1(const char *s1, int n, const char *s2)
1814 if (strlen(s2) != n)
1815 return 0;
1816 return memcmp(s1, s2, n) == 0;
1819 /* takes a comma separated list of log masks. Return 0 if error. */
1820 int cpu_str_to_log_mask(const char *str)
1822 const CPULogItem *item;
1823 int mask;
1824 const char *p, *p1;
1826 p = str;
1827 mask = 0;
1828 for(;;) {
1829 p1 = strchr(p, ',');
1830 if (!p1)
1831 p1 = p + strlen(p);
1832 if(cmp1(p,p1-p,"all")) {
1833 for(item = cpu_log_items; item->mask != 0; item++) {
1834 mask |= item->mask;
1836 } else {
1837 for(item = cpu_log_items; item->mask != 0; item++) {
1838 if (cmp1(p, p1 - p, item->name))
1839 goto found;
1841 return 0;
1843 found:
1844 mask |= item->mask;
1845 if (*p1 != ',')
1846 break;
1847 p = p1 + 1;
1849 return mask;
1852 void cpu_abort(CPUState *env, const char *fmt, ...)
1854 va_list ap;
1855 va_list ap2;
1857 va_start(ap, fmt);
1858 va_copy(ap2, ap);
1859 fprintf(stderr, "qemu: fatal: ");
1860 vfprintf(stderr, fmt, ap);
1861 fprintf(stderr, "\n");
1862 #ifdef TARGET_I386
1863 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1864 #else
1865 cpu_dump_state(env, stderr, fprintf, 0);
1866 #endif
1867 if (qemu_log_enabled()) {
1868 qemu_log("qemu: fatal: ");
1869 qemu_log_vprintf(fmt, ap2);
1870 qemu_log("\n");
1871 #ifdef TARGET_I386
1872 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1873 #else
1874 log_cpu_state(env, 0);
1875 #endif
1876 qemu_log_flush();
1877 qemu_log_close();
1879 va_end(ap2);
1880 va_end(ap);
1881 #if defined(CONFIG_USER_ONLY)
1883 struct sigaction act;
1884 sigfillset(&act.sa_mask);
1885 act.sa_handler = SIG_DFL;
1886 sigaction(SIGABRT, &act, NULL);
1888 #endif
1889 abort();
1892 CPUState *cpu_copy(CPUState *env)
1894 CPUState *new_env = cpu_init(env->cpu_model_str);
1895 CPUState *next_cpu = new_env->next_cpu;
1896 int cpu_index = new_env->cpu_index;
1897 #if defined(TARGET_HAS_ICE)
1898 CPUBreakpoint *bp;
1899 CPUWatchpoint *wp;
1900 #endif
1902 memcpy(new_env, env, sizeof(CPUState));
1904 /* Preserve chaining and index. */
1905 new_env->next_cpu = next_cpu;
1906 new_env->cpu_index = cpu_index;
1908 /* Clone all break/watchpoints.
1909 Note: Once we support ptrace with hw-debug register access, make sure
1910 BP_CPU break/watchpoints are handled correctly on clone. */
1911 QTAILQ_INIT(&env->breakpoints);
1912 QTAILQ_INIT(&env->watchpoints);
1913 #if defined(TARGET_HAS_ICE)
1914 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1915 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1917 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1918 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1919 wp->flags, NULL);
1921 #endif
1923 return new_env;
1926 #if !defined(CONFIG_USER_ONLY)
1928 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1930 unsigned int i;
1932 /* Discard jump cache entries for any tb which might potentially
1933 overlap the flushed page. */
1934 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1935 memset (&env->tb_jmp_cache[i], 0,
1936 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1938 i = tb_jmp_cache_hash_page(addr);
1939 memset (&env->tb_jmp_cache[i], 0,
1940 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1943 static CPUTLBEntry s_cputlb_empty_entry = {
1944 .addr_read = -1,
1945 .addr_write = -1,
1946 .addr_code = -1,
1947 .addend = -1,
1950 /* NOTE: if flush_global is true, also flush global entries (not
1951 implemented yet) */
1952 void tlb_flush(CPUState *env, int flush_global)
1954 int i;
1956 #if defined(DEBUG_TLB)
1957 printf("tlb_flush:\n");
1958 #endif
1959 /* must reset current TB so that interrupts cannot modify the
1960 links while we are modifying them */
1961 env->current_tb = NULL;
1963 for(i = 0; i < CPU_TLB_SIZE; i++) {
1964 int mmu_idx;
1965 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1966 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1970 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1972 env->tlb_flush_addr = -1;
1973 env->tlb_flush_mask = 0;
1974 tlb_flush_count++;
1977 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1979 if (addr == (tlb_entry->addr_read &
1980 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1981 addr == (tlb_entry->addr_write &
1982 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1983 addr == (tlb_entry->addr_code &
1984 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1985 *tlb_entry = s_cputlb_empty_entry;
1989 void tlb_flush_page(CPUState *env, target_ulong addr)
1991 int i;
1992 int mmu_idx;
1994 #if defined(DEBUG_TLB)
1995 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1996 #endif
1997 /* Check if we need to flush due to large pages. */
1998 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
1999 #if defined(DEBUG_TLB)
2000 printf("tlb_flush_page: forced full flush ("
2001 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2002 env->tlb_flush_addr, env->tlb_flush_mask);
2003 #endif
2004 tlb_flush(env, 1);
2005 return;
2007 /* must reset current TB so that interrupts cannot modify the
2008 links while we are modifying them */
2009 env->current_tb = NULL;
2011 addr &= TARGET_PAGE_MASK;
2012 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2013 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2014 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2016 tlb_flush_jmp_cache(env, addr);
2019 /* update the TLBs so that writes to code in the virtual page 'addr'
2020 can be detected */
2021 static void tlb_protect_code(ram_addr_t ram_addr)
2023 cpu_physical_memory_reset_dirty(ram_addr,
2024 ram_addr + TARGET_PAGE_SIZE,
2025 CODE_DIRTY_FLAG);
2028 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2029 tested for self modifying code */
2030 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2031 target_ulong vaddr)
2033 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2036 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2037 unsigned long start, unsigned long length)
2039 unsigned long addr;
2040 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2041 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2042 if ((addr - start) < length) {
2043 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2048 /* Note: start and end must be within the same ram block. */
2049 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2050 int dirty_flags)
2052 CPUState *env;
2053 unsigned long length, start1;
2054 int i;
2056 start &= TARGET_PAGE_MASK;
2057 end = TARGET_PAGE_ALIGN(end);
2059 length = end - start;
2060 if (length == 0)
2061 return;
2062 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2064 /* we modify the TLB cache so that the dirty bit will be set again
2065 when accessing the range */
2066 start1 = (unsigned long)qemu_get_ram_ptr(start);
2067 /* Chek that we don't span multiple blocks - this breaks the
2068 address comparisons below. */
2069 if ((unsigned long)qemu_get_ram_ptr(end - 1) - start1
2070 != (end - 1) - start) {
2071 abort();
2074 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2075 int mmu_idx;
2076 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2077 for(i = 0; i < CPU_TLB_SIZE; i++)
2078 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2079 start1, length);
2084 int cpu_physical_memory_set_dirty_tracking(int enable)
2086 int ret = 0;
2087 in_migration = enable;
2088 ret = cpu_notify_migration_log(!!enable);
2089 return ret;
2092 int cpu_physical_memory_get_dirty_tracking(void)
2094 return in_migration;
2097 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2098 target_phys_addr_t end_addr)
2100 int ret;
2102 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2103 return ret;
2106 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2108 ram_addr_t ram_addr;
2109 void *p;
2111 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2112 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2113 + tlb_entry->addend);
2114 ram_addr = qemu_ram_addr_from_host(p);
2115 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2116 tlb_entry->addr_write |= TLB_NOTDIRTY;
2121 /* update the TLB according to the current state of the dirty bits */
2122 void cpu_tlb_update_dirty(CPUState *env)
2124 int i;
2125 int mmu_idx;
2126 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2127 for(i = 0; i < CPU_TLB_SIZE; i++)
2128 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2132 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2134 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2135 tlb_entry->addr_write = vaddr;
2138 /* update the TLB corresponding to virtual page vaddr
2139 so that it is no longer dirty */
2140 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2142 int i;
2143 int mmu_idx;
2145 vaddr &= TARGET_PAGE_MASK;
2146 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2147 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2148 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2151 /* Our TLB does not support large pages, so remember the area covered by
2152 large pages and trigger a full TLB flush if these are invalidated. */
2153 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2154 target_ulong size)
2156 target_ulong mask = ~(size - 1);
2158 if (env->tlb_flush_addr == (target_ulong)-1) {
2159 env->tlb_flush_addr = vaddr & mask;
2160 env->tlb_flush_mask = mask;
2161 return;
2163 /* Extend the existing region to include the new page.
2164 This is a compromise between unnecessary flushes and the cost
2165 of maintaining a full variable size TLB. */
2166 mask &= env->tlb_flush_mask;
2167 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2168 mask <<= 1;
2170 env->tlb_flush_addr &= mask;
2171 env->tlb_flush_mask = mask;
2174 /* Add a new TLB entry. At most one entry for a given virtual address
2175 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2176 supplied size is only used by tlb_flush_page. */
2177 void tlb_set_page(CPUState *env, target_ulong vaddr,
2178 target_phys_addr_t paddr, int prot,
2179 int mmu_idx, target_ulong size)
2181 PhysPageDesc *p;
2182 unsigned long pd;
2183 unsigned int index;
2184 target_ulong address;
2185 target_ulong code_address;
2186 unsigned long addend;
2187 CPUTLBEntry *te;
2188 CPUWatchpoint *wp;
2189 target_phys_addr_t iotlb;
2191 assert(size >= TARGET_PAGE_SIZE);
2192 if (size != TARGET_PAGE_SIZE) {
2193 tlb_add_large_page(env, vaddr, size);
2195 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2196 if (!p) {
2197 pd = IO_MEM_UNASSIGNED;
2198 } else {
2199 pd = p->phys_offset;
2201 #if defined(DEBUG_TLB)
2202 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x%08x prot=%x idx=%d smmu=%d pd=0x%08lx\n",
2203 vaddr, (int)paddr, prot, mmu_idx, is_softmmu, pd);
2204 #endif
2206 address = vaddr;
2207 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2208 /* IO memory case (romd handled later) */
2209 address |= TLB_MMIO;
2211 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2212 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2213 /* Normal RAM. */
2214 iotlb = pd & TARGET_PAGE_MASK;
2215 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2216 iotlb |= IO_MEM_NOTDIRTY;
2217 else
2218 iotlb |= IO_MEM_ROM;
2219 } else {
2220 /* IO handlers are currently passed a physical address.
2221 It would be nice to pass an offset from the base address
2222 of that region. This would avoid having to special case RAM,
2223 and avoid full address decoding in every device.
2224 We can't use the high bits of pd for this because
2225 IO_MEM_ROMD uses these as a ram address. */
2226 iotlb = (pd & ~TARGET_PAGE_MASK);
2227 if (p) {
2228 iotlb += p->region_offset;
2229 } else {
2230 iotlb += paddr;
2234 code_address = address;
2235 /* Make accesses to pages with watchpoints go via the
2236 watchpoint trap routines. */
2237 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2238 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2239 iotlb = io_mem_watch + paddr;
2240 /* TODO: The memory case can be optimized by not trapping
2241 reads of pages with a write breakpoint. */
2242 address |= TLB_MMIO;
2246 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2247 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2248 te = &env->tlb_table[mmu_idx][index];
2249 te->addend = addend - vaddr;
2250 if (prot & PAGE_READ) {
2251 te->addr_read = address;
2252 } else {
2253 te->addr_read = -1;
2256 if (prot & PAGE_EXEC) {
2257 te->addr_code = code_address;
2258 } else {
2259 te->addr_code = -1;
2261 if (prot & PAGE_WRITE) {
2262 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2263 (pd & IO_MEM_ROMD)) {
2264 /* Write access calls the I/O callback. */
2265 te->addr_write = address | TLB_MMIO;
2266 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2267 !cpu_physical_memory_is_dirty(pd)) {
2268 te->addr_write = address | TLB_NOTDIRTY;
2269 } else {
2270 te->addr_write = address;
2272 } else {
2273 te->addr_write = -1;
2277 #else
2279 void tlb_flush(CPUState *env, int flush_global)
2283 void tlb_flush_page(CPUState *env, target_ulong addr)
2288 * Walks guest process memory "regions" one by one
2289 * and calls callback function 'fn' for each region.
2292 struct walk_memory_regions_data
2294 walk_memory_regions_fn fn;
2295 void *priv;
2296 unsigned long start;
2297 int prot;
2300 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2301 abi_ulong end, int new_prot)
2303 if (data->start != -1ul) {
2304 int rc = data->fn(data->priv, data->start, end, data->prot);
2305 if (rc != 0) {
2306 return rc;
2310 data->start = (new_prot ? end : -1ul);
2311 data->prot = new_prot;
2313 return 0;
2316 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2317 abi_ulong base, int level, void **lp)
2319 abi_ulong pa;
2320 int i, rc;
2322 if (*lp == NULL) {
2323 return walk_memory_regions_end(data, base, 0);
2326 if (level == 0) {
2327 PageDesc *pd = *lp;
2328 for (i = 0; i < L2_SIZE; ++i) {
2329 int prot = pd[i].flags;
2331 pa = base | (i << TARGET_PAGE_BITS);
2332 if (prot != data->prot) {
2333 rc = walk_memory_regions_end(data, pa, prot);
2334 if (rc != 0) {
2335 return rc;
2339 } else {
2340 void **pp = *lp;
2341 for (i = 0; i < L2_SIZE; ++i) {
2342 pa = base | ((abi_ulong)i <<
2343 (TARGET_PAGE_BITS + L2_BITS * level));
2344 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2345 if (rc != 0) {
2346 return rc;
2351 return 0;
2354 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2356 struct walk_memory_regions_data data;
2357 unsigned long i;
2359 data.fn = fn;
2360 data.priv = priv;
2361 data.start = -1ul;
2362 data.prot = 0;
2364 for (i = 0; i < V_L1_SIZE; i++) {
2365 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2366 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2367 if (rc != 0) {
2368 return rc;
2372 return walk_memory_regions_end(&data, 0, 0);
2375 static int dump_region(void *priv, abi_ulong start,
2376 abi_ulong end, unsigned long prot)
2378 FILE *f = (FILE *)priv;
2380 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2381 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2382 start, end, end - start,
2383 ((prot & PAGE_READ) ? 'r' : '-'),
2384 ((prot & PAGE_WRITE) ? 'w' : '-'),
2385 ((prot & PAGE_EXEC) ? 'x' : '-'));
2387 return (0);
2390 /* dump memory mappings */
2391 void page_dump(FILE *f)
2393 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2394 "start", "end", "size", "prot");
2395 walk_memory_regions(f, dump_region);
2398 int page_get_flags(target_ulong address)
2400 PageDesc *p;
2402 p = page_find(address >> TARGET_PAGE_BITS);
2403 if (!p)
2404 return 0;
2405 return p->flags;
2408 /* Modify the flags of a page and invalidate the code if necessary.
2409 The flag PAGE_WRITE_ORG is positioned automatically depending
2410 on PAGE_WRITE. The mmap_lock should already be held. */
2411 void page_set_flags(target_ulong start, target_ulong end, int flags)
2413 target_ulong addr, len;
2415 /* This function should never be called with addresses outside the
2416 guest address space. If this assert fires, it probably indicates
2417 a missing call to h2g_valid. */
2418 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2419 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2420 #endif
2421 assert(start < end);
2423 start = start & TARGET_PAGE_MASK;
2424 end = TARGET_PAGE_ALIGN(end);
2426 if (flags & PAGE_WRITE) {
2427 flags |= PAGE_WRITE_ORG;
2430 for (addr = start, len = end - start;
2431 len != 0;
2432 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2433 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2435 /* If the write protection bit is set, then we invalidate
2436 the code inside. */
2437 if (!(p->flags & PAGE_WRITE) &&
2438 (flags & PAGE_WRITE) &&
2439 p->first_tb) {
2440 tb_invalidate_phys_page(addr, 0, NULL);
2442 p->flags = flags;
2446 int page_check_range(target_ulong start, target_ulong len, int flags)
2448 PageDesc *p;
2449 target_ulong end;
2450 target_ulong addr;
2452 /* This function should never be called with addresses outside the
2453 guest address space. If this assert fires, it probably indicates
2454 a missing call to h2g_valid. */
2455 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2456 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2457 #endif
2459 if (start + len - 1 < start) {
2460 /* We've wrapped around. */
2461 return -1;
2464 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2465 start = start & TARGET_PAGE_MASK;
2467 for (addr = start, len = end - start;
2468 len != 0;
2469 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2470 p = page_find(addr >> TARGET_PAGE_BITS);
2471 if( !p )
2472 return -1;
2473 if( !(p->flags & PAGE_VALID) )
2474 return -1;
2476 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2477 return -1;
2478 if (flags & PAGE_WRITE) {
2479 if (!(p->flags & PAGE_WRITE_ORG))
2480 return -1;
2481 /* unprotect the page if it was put read-only because it
2482 contains translated code */
2483 if (!(p->flags & PAGE_WRITE)) {
2484 if (!page_unprotect(addr, 0, NULL))
2485 return -1;
2487 return 0;
2490 return 0;
2493 /* called from signal handler: invalidate the code and unprotect the
2494 page. Return TRUE if the fault was successfully handled. */
2495 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2497 unsigned int prot;
2498 PageDesc *p;
2499 target_ulong host_start, host_end, addr;
2501 /* Technically this isn't safe inside a signal handler. However we
2502 know this only ever happens in a synchronous SEGV handler, so in
2503 practice it seems to be ok. */
2504 mmap_lock();
2506 p = page_find(address >> TARGET_PAGE_BITS);
2507 if (!p) {
2508 mmap_unlock();
2509 return 0;
2512 /* if the page was really writable, then we change its
2513 protection back to writable */
2514 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2515 host_start = address & qemu_host_page_mask;
2516 host_end = host_start + qemu_host_page_size;
2518 prot = 0;
2519 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2520 p = page_find(addr >> TARGET_PAGE_BITS);
2521 p->flags |= PAGE_WRITE;
2522 prot |= p->flags;
2524 /* and since the content will be modified, we must invalidate
2525 the corresponding translated code. */
2526 tb_invalidate_phys_page(addr, pc, puc);
2527 #ifdef DEBUG_TB_CHECK
2528 tb_invalidate_check(addr);
2529 #endif
2531 mprotect((void *)g2h(host_start), qemu_host_page_size,
2532 prot & PAGE_BITS);
2534 mmap_unlock();
2535 return 1;
2537 mmap_unlock();
2538 return 0;
2541 static inline void tlb_set_dirty(CPUState *env,
2542 unsigned long addr, target_ulong vaddr)
2545 #endif /* defined(CONFIG_USER_ONLY) */
2547 #if !defined(CONFIG_USER_ONLY)
2549 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2550 typedef struct subpage_t {
2551 target_phys_addr_t base;
2552 CPUReadMemoryFunc * const *mem_read[TARGET_PAGE_SIZE][4];
2553 CPUWriteMemoryFunc * const *mem_write[TARGET_PAGE_SIZE][4];
2554 void *opaque[TARGET_PAGE_SIZE][2][4];
2555 ram_addr_t region_offset[TARGET_PAGE_SIZE][2][4];
2556 } subpage_t;
2558 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2559 ram_addr_t memory, ram_addr_t region_offset);
2560 static void *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2561 ram_addr_t orig_memory, ram_addr_t region_offset);
2562 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2563 need_subpage) \
2564 do { \
2565 if (addr > start_addr) \
2566 start_addr2 = 0; \
2567 else { \
2568 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2569 if (start_addr2 > 0) \
2570 need_subpage = 1; \
2573 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2574 end_addr2 = TARGET_PAGE_SIZE - 1; \
2575 else { \
2576 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2577 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2578 need_subpage = 1; \
2580 } while (0)
2582 /* register physical memory.
2583 For RAM, 'size' must be a multiple of the target page size.
2584 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2585 io memory page. The address used when calling the IO function is
2586 the offset from the start of the region, plus region_offset. Both
2587 start_addr and region_offset are rounded down to a page boundary
2588 before calculating this offset. This should not be a problem unless
2589 the low bits of start_addr and region_offset differ. */
2590 void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
2591 ram_addr_t size,
2592 ram_addr_t phys_offset,
2593 ram_addr_t region_offset)
2595 target_phys_addr_t addr, end_addr;
2596 PhysPageDesc *p;
2597 CPUState *env;
2598 ram_addr_t orig_size = size;
2599 void *subpage;
2601 cpu_notify_set_memory(start_addr, size, phys_offset);
2603 if (phys_offset == IO_MEM_UNASSIGNED) {
2604 region_offset = start_addr;
2606 region_offset &= TARGET_PAGE_MASK;
2607 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2608 end_addr = start_addr + (target_phys_addr_t)size;
2609 for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {
2610 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2611 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2612 ram_addr_t orig_memory = p->phys_offset;
2613 target_phys_addr_t start_addr2, end_addr2;
2614 int need_subpage = 0;
2616 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2617 need_subpage);
2618 if (need_subpage || phys_offset & IO_MEM_SUBWIDTH) {
2619 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2620 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2621 &p->phys_offset, orig_memory,
2622 p->region_offset);
2623 } else {
2624 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2625 >> IO_MEM_SHIFT];
2627 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2628 region_offset);
2629 p->region_offset = 0;
2630 } else {
2631 p->phys_offset = phys_offset;
2632 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2633 (phys_offset & IO_MEM_ROMD))
2634 phys_offset += TARGET_PAGE_SIZE;
2636 } else {
2637 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2638 p->phys_offset = phys_offset;
2639 p->region_offset = region_offset;
2640 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2641 (phys_offset & IO_MEM_ROMD)) {
2642 phys_offset += TARGET_PAGE_SIZE;
2643 } else {
2644 target_phys_addr_t start_addr2, end_addr2;
2645 int need_subpage = 0;
2647 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2648 end_addr2, need_subpage);
2650 if (need_subpage || phys_offset & IO_MEM_SUBWIDTH) {
2651 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2652 &p->phys_offset, IO_MEM_UNASSIGNED,
2653 addr & TARGET_PAGE_MASK);
2654 subpage_register(subpage, start_addr2, end_addr2,
2655 phys_offset, region_offset);
2656 p->region_offset = 0;
2660 region_offset += TARGET_PAGE_SIZE;
2663 /* since each CPU stores ram addresses in its TLB cache, we must
2664 reset the modified entries */
2665 /* XXX: slow ! */
2666 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2667 tlb_flush(env, 1);
2671 /* XXX: temporary until new memory mapping API */
2672 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2674 PhysPageDesc *p;
2676 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2677 if (!p)
2678 return IO_MEM_UNASSIGNED;
2679 return p->phys_offset;
2682 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2684 if (kvm_enabled())
2685 kvm_coalesce_mmio_region(addr, size);
2688 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2690 if (kvm_enabled())
2691 kvm_uncoalesce_mmio_region(addr, size);
2694 void qemu_flush_coalesced_mmio_buffer(void)
2696 if (kvm_enabled())
2697 kvm_flush_coalesced_mmio_buffer();
2700 #if defined(__linux__) && !defined(TARGET_S390X)
2702 #include <sys/vfs.h>
2704 #define HUGETLBFS_MAGIC 0x958458f6
2706 static long gethugepagesize(const char *path)
2708 struct statfs fs;
2709 int ret;
2711 do {
2712 ret = statfs(path, &fs);
2713 } while (ret != 0 && errno == EINTR);
2715 if (ret != 0) {
2716 perror(path);
2717 return 0;
2720 if (fs.f_type != HUGETLBFS_MAGIC)
2721 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2723 return fs.f_bsize;
2726 static void *file_ram_alloc(ram_addr_t memory, const char *path)
2728 char *filename;
2729 void *area;
2730 int fd;
2731 #ifdef MAP_POPULATE
2732 int flags;
2733 #endif
2734 unsigned long hpagesize;
2736 hpagesize = gethugepagesize(path);
2737 if (!hpagesize) {
2738 return NULL;
2741 if (memory < hpagesize) {
2742 return NULL;
2745 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2746 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2747 return NULL;
2750 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2751 return NULL;
2754 fd = mkstemp(filename);
2755 if (fd < 0) {
2756 perror("unable to create backing store for hugepages");
2757 free(filename);
2758 return NULL;
2760 unlink(filename);
2761 free(filename);
2763 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2766 * ftruncate is not supported by hugetlbfs in older
2767 * hosts, so don't bother bailing out on errors.
2768 * If anything goes wrong with it under other filesystems,
2769 * mmap will fail.
2771 if (ftruncate(fd, memory))
2772 perror("ftruncate");
2774 #ifdef MAP_POPULATE
2775 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2776 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2777 * to sidestep this quirk.
2779 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2780 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2781 #else
2782 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2783 #endif
2784 if (area == MAP_FAILED) {
2785 perror("file_ram_alloc: can't mmap RAM pages");
2786 close(fd);
2787 return (NULL);
2789 return area;
2791 #endif
2793 ram_addr_t qemu_ram_alloc(ram_addr_t size)
2795 RAMBlock *new_block;
2797 size = TARGET_PAGE_ALIGN(size);
2798 new_block = qemu_malloc(sizeof(*new_block));
2800 if (mem_path) {
2801 #if defined (__linux__) && !defined(TARGET_S390X)
2802 new_block->host = file_ram_alloc(size, mem_path);
2803 if (!new_block->host)
2804 exit(1);
2805 #else
2806 fprintf(stderr, "-mem-path option unsupported\n");
2807 exit(1);
2808 #endif
2809 } else {
2810 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2811 /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
2812 new_block->host = mmap((void*)0x1000000, size,
2813 PROT_EXEC|PROT_READ|PROT_WRITE,
2814 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
2815 #else
2816 new_block->host = qemu_vmalloc(size);
2817 #endif
2818 #ifdef MADV_MERGEABLE
2819 madvise(new_block->host, size, MADV_MERGEABLE);
2820 #endif
2822 new_block->offset = last_ram_offset;
2823 new_block->length = size;
2825 new_block->next = ram_blocks;
2826 ram_blocks = new_block;
2828 phys_ram_dirty = qemu_realloc(phys_ram_dirty,
2829 (last_ram_offset + size) >> TARGET_PAGE_BITS);
2830 memset(phys_ram_dirty + (last_ram_offset >> TARGET_PAGE_BITS),
2831 0xff, size >> TARGET_PAGE_BITS);
2833 last_ram_offset += size;
2835 if (kvm_enabled())
2836 kvm_setup_guest_memory(new_block->host, size);
2838 return new_block->offset;
2841 void qemu_ram_free(ram_addr_t addr)
2843 /* TODO: implement this. */
2846 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2847 With the exception of the softmmu code in this file, this should
2848 only be used for local memory (e.g. video ram) that the device owns,
2849 and knows it isn't going to access beyond the end of the block.
2851 It should not be used for general purpose DMA.
2852 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2854 void *qemu_get_ram_ptr(ram_addr_t addr)
2856 RAMBlock *prev;
2857 RAMBlock **prevp;
2858 RAMBlock *block;
2860 prev = NULL;
2861 prevp = &ram_blocks;
2862 block = ram_blocks;
2863 while (block && (block->offset > addr
2864 || block->offset + block->length <= addr)) {
2865 if (prev)
2866 prevp = &prev->next;
2867 prev = block;
2868 block = block->next;
2870 if (!block) {
2871 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2872 abort();
2874 /* Move this entry to to start of the list. */
2875 if (prev) {
2876 prev->next = block->next;
2877 block->next = *prevp;
2878 *prevp = block;
2880 return block->host + (addr - block->offset);
2883 /* Some of the softmmu routines need to translate from a host pointer
2884 (typically a TLB entry) back to a ram offset. */
2885 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2887 RAMBlock *prev;
2888 RAMBlock *block;
2889 uint8_t *host = ptr;
2891 prev = NULL;
2892 block = ram_blocks;
2893 while (block && (block->host > host
2894 || block->host + block->length <= host)) {
2895 prev = block;
2896 block = block->next;
2898 if (!block) {
2899 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2900 abort();
2902 return block->offset + (host - block->host);
2905 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
2907 #ifdef DEBUG_UNASSIGNED
2908 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2909 #endif
2910 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2911 do_unassigned_access(addr, 0, 0, 0, 1);
2912 #endif
2913 return 0;
2916 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
2918 #ifdef DEBUG_UNASSIGNED
2919 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2920 #endif
2921 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2922 do_unassigned_access(addr, 0, 0, 0, 2);
2923 #endif
2924 return 0;
2927 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
2929 #ifdef DEBUG_UNASSIGNED
2930 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2931 #endif
2932 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2933 do_unassigned_access(addr, 0, 0, 0, 4);
2934 #endif
2935 return 0;
2938 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
2940 #ifdef DEBUG_UNASSIGNED
2941 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
2942 #endif
2943 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2944 do_unassigned_access(addr, 1, 0, 0, 1);
2945 #endif
2948 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
2950 #ifdef DEBUG_UNASSIGNED
2951 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
2952 #endif
2953 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2954 do_unassigned_access(addr, 1, 0, 0, 2);
2955 #endif
2958 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
2960 #ifdef DEBUG_UNASSIGNED
2961 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
2962 #endif
2963 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2964 do_unassigned_access(addr, 1, 0, 0, 4);
2965 #endif
2968 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
2969 unassigned_mem_readb,
2970 unassigned_mem_readw,
2971 unassigned_mem_readl,
2974 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
2975 unassigned_mem_writeb,
2976 unassigned_mem_writew,
2977 unassigned_mem_writel,
2980 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
2981 uint32_t val)
2983 int dirty_flags;
2984 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2985 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2986 #if !defined(CONFIG_USER_ONLY)
2987 tb_invalidate_phys_page_fast(ram_addr, 1);
2988 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2989 #endif
2991 stb_p(qemu_get_ram_ptr(ram_addr), val);
2992 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2993 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2994 /* we remove the notdirty callback only if the code has been
2995 flushed */
2996 if (dirty_flags == 0xff)
2997 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3000 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3001 uint32_t val)
3003 int dirty_flags;
3004 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3005 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3006 #if !defined(CONFIG_USER_ONLY)
3007 tb_invalidate_phys_page_fast(ram_addr, 2);
3008 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3009 #endif
3011 stw_p(qemu_get_ram_ptr(ram_addr), val);
3012 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3013 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3014 /* we remove the notdirty callback only if the code has been
3015 flushed */
3016 if (dirty_flags == 0xff)
3017 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3020 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3021 uint32_t val)
3023 int dirty_flags;
3024 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3025 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3026 #if !defined(CONFIG_USER_ONLY)
3027 tb_invalidate_phys_page_fast(ram_addr, 4);
3028 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3029 #endif
3031 stl_p(qemu_get_ram_ptr(ram_addr), val);
3032 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3033 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3034 /* we remove the notdirty callback only if the code has been
3035 flushed */
3036 if (dirty_flags == 0xff)
3037 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3040 static CPUReadMemoryFunc * const error_mem_read[3] = {
3041 NULL, /* never used */
3042 NULL, /* never used */
3043 NULL, /* never used */
3046 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3047 notdirty_mem_writeb,
3048 notdirty_mem_writew,
3049 notdirty_mem_writel,
3052 /* Generate a debug exception if a watchpoint has been hit. */
3053 static void check_watchpoint(int offset, int len_mask, int flags)
3055 CPUState *env = cpu_single_env;
3056 target_ulong pc, cs_base;
3057 TranslationBlock *tb;
3058 target_ulong vaddr;
3059 CPUWatchpoint *wp;
3060 int cpu_flags;
3062 if (env->watchpoint_hit) {
3063 /* We re-entered the check after replacing the TB. Now raise
3064 * the debug interrupt so that is will trigger after the
3065 * current instruction. */
3066 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3067 return;
3069 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3070 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3071 if ((vaddr == (wp->vaddr & len_mask) ||
3072 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3073 wp->flags |= BP_WATCHPOINT_HIT;
3074 if (!env->watchpoint_hit) {
3075 env->watchpoint_hit = wp;
3076 tb = tb_find_pc(env->mem_io_pc);
3077 if (!tb) {
3078 cpu_abort(env, "check_watchpoint: could not find TB for "
3079 "pc=%p", (void *)env->mem_io_pc);
3081 cpu_restore_state(tb, env, env->mem_io_pc, NULL);
3082 tb_phys_invalidate(tb, -1);
3083 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3084 env->exception_index = EXCP_DEBUG;
3085 } else {
3086 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3087 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3089 cpu_resume_from_signal(env, NULL);
3091 } else {
3092 wp->flags &= ~BP_WATCHPOINT_HIT;
3097 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3098 so these check for a hit then pass through to the normal out-of-line
3099 phys routines. */
3100 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3102 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3103 return ldub_phys(addr);
3106 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3108 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3109 return lduw_phys(addr);
3112 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3114 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3115 return ldl_phys(addr);
3118 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3119 uint32_t val)
3121 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3122 stb_phys(addr, val);
3125 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3126 uint32_t val)
3128 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3129 stw_phys(addr, val);
3132 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3133 uint32_t val)
3135 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3136 stl_phys(addr, val);
3139 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3140 watch_mem_readb,
3141 watch_mem_readw,
3142 watch_mem_readl,
3145 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3146 watch_mem_writeb,
3147 watch_mem_writew,
3148 watch_mem_writel,
3151 static inline uint32_t subpage_readlen (subpage_t *mmio, target_phys_addr_t addr,
3152 unsigned int len)
3154 uint32_t ret;
3155 unsigned int idx;
3157 idx = SUBPAGE_IDX(addr);
3158 #if defined(DEBUG_SUBPAGE)
3159 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3160 mmio, len, addr, idx);
3161 #endif
3162 ret = (**mmio->mem_read[idx][len])(mmio->opaque[idx][0][len],
3163 addr + mmio->region_offset[idx][0][len]);
3165 return ret;
3168 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3169 uint32_t value, unsigned int len)
3171 unsigned int idx;
3173 idx = SUBPAGE_IDX(addr);
3174 #if defined(DEBUG_SUBPAGE)
3175 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n", __func__,
3176 mmio, len, addr, idx, value);
3177 #endif
3178 (**mmio->mem_write[idx][len])(mmio->opaque[idx][1][len],
3179 addr + mmio->region_offset[idx][1][len],
3180 value);
3183 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3185 #if defined(DEBUG_SUBPAGE)
3186 printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
3187 #endif
3189 return subpage_readlen(opaque, addr, 0);
3192 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3193 uint32_t value)
3195 #if defined(DEBUG_SUBPAGE)
3196 printf("%s: addr " TARGET_FMT_plx " val %08x\n", __func__, addr, value);
3197 #endif
3198 subpage_writelen(opaque, addr, value, 0);
3201 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3203 #if defined(DEBUG_SUBPAGE)
3204 printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
3205 #endif
3207 return subpage_readlen(opaque, addr, 1);
3210 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3211 uint32_t value)
3213 #if defined(DEBUG_SUBPAGE)
3214 printf("%s: addr " TARGET_FMT_plx " val %08x\n", __func__, addr, value);
3215 #endif
3216 subpage_writelen(opaque, addr, value, 1);
3219 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3221 #if defined(DEBUG_SUBPAGE)
3222 printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
3223 #endif
3225 return subpage_readlen(opaque, addr, 2);
3228 static void subpage_writel (void *opaque,
3229 target_phys_addr_t addr, uint32_t value)
3231 #if defined(DEBUG_SUBPAGE)
3232 printf("%s: addr " TARGET_FMT_plx " val %08x\n", __func__, addr, value);
3233 #endif
3234 subpage_writelen(opaque, addr, value, 2);
3237 static CPUReadMemoryFunc * const subpage_read[] = {
3238 &subpage_readb,
3239 &subpage_readw,
3240 &subpage_readl,
3243 static CPUWriteMemoryFunc * const subpage_write[] = {
3244 &subpage_writeb,
3245 &subpage_writew,
3246 &subpage_writel,
3249 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3250 ram_addr_t memory, ram_addr_t region_offset)
3252 int idx, eidx;
3253 unsigned int i;
3255 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3256 return -1;
3257 idx = SUBPAGE_IDX(start);
3258 eidx = SUBPAGE_IDX(end);
3259 #if defined(DEBUG_SUBPAGE)
3260 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3261 mmio, start, end, idx, eidx, memory);
3262 #endif
3263 memory >>= IO_MEM_SHIFT;
3264 for (; idx <= eidx; idx++) {
3265 for (i = 0; i < 4; i++) {
3266 if (io_mem_read[memory][i]) {
3267 mmio->mem_read[idx][i] = &io_mem_read[memory][i];
3268 mmio->opaque[idx][0][i] = io_mem_opaque[memory];
3269 mmio->region_offset[idx][0][i] = region_offset;
3271 if (io_mem_write[memory][i]) {
3272 mmio->mem_write[idx][i] = &io_mem_write[memory][i];
3273 mmio->opaque[idx][1][i] = io_mem_opaque[memory];
3274 mmio->region_offset[idx][1][i] = region_offset;
3279 return 0;
3282 static void *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3283 ram_addr_t orig_memory, ram_addr_t region_offset)
3285 subpage_t *mmio;
3286 int subpage_memory;
3288 mmio = qemu_mallocz(sizeof(subpage_t));
3290 mmio->base = base;
3291 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio);
3292 #if defined(DEBUG_SUBPAGE)
3293 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3294 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3295 #endif
3296 *phys = subpage_memory | IO_MEM_SUBPAGE;
3297 subpage_register(mmio, 0, TARGET_PAGE_SIZE - 1, orig_memory,
3298 region_offset);
3300 return mmio;
3303 static int get_free_io_mem_idx(void)
3305 int i;
3307 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3308 if (!io_mem_used[i]) {
3309 io_mem_used[i] = 1;
3310 return i;
3312 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3313 return -1;
3316 /* mem_read and mem_write are arrays of functions containing the
3317 function to access byte (index 0), word (index 1) and dword (index
3318 2). Functions can be omitted with a NULL function pointer.
3319 If io_index is non zero, the corresponding io zone is
3320 modified. If it is zero, a new io zone is allocated. The return
3321 value can be used with cpu_register_physical_memory(). (-1) is
3322 returned if error. */
3323 static int cpu_register_io_memory_fixed(int io_index,
3324 CPUReadMemoryFunc * const *mem_read,
3325 CPUWriteMemoryFunc * const *mem_write,
3326 void *opaque)
3328 int i, subwidth = 0;
3330 if (io_index <= 0) {
3331 io_index = get_free_io_mem_idx();
3332 if (io_index == -1)
3333 return io_index;
3334 } else {
3335 io_index >>= IO_MEM_SHIFT;
3336 if (io_index >= IO_MEM_NB_ENTRIES)
3337 return -1;
3340 for(i = 0;i < 3; i++) {
3341 if (!mem_read[i] || !mem_write[i])
3342 subwidth = IO_MEM_SUBWIDTH;
3343 io_mem_read[io_index][i] = mem_read[i];
3344 io_mem_write[io_index][i] = mem_write[i];
3346 io_mem_opaque[io_index] = opaque;
3347 return (io_index << IO_MEM_SHIFT) | subwidth;
3350 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3351 CPUWriteMemoryFunc * const *mem_write,
3352 void *opaque)
3354 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque);
3357 void cpu_unregister_io_memory(int io_table_address)
3359 int i;
3360 int io_index = io_table_address >> IO_MEM_SHIFT;
3362 for (i=0;i < 3; i++) {
3363 io_mem_read[io_index][i] = unassigned_mem_read[i];
3364 io_mem_write[io_index][i] = unassigned_mem_write[i];
3366 io_mem_opaque[io_index] = NULL;
3367 io_mem_used[io_index] = 0;
3370 static void io_mem_init(void)
3372 int i;
3374 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read, unassigned_mem_write, NULL);
3375 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read, unassigned_mem_write, NULL);
3376 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read, notdirty_mem_write, NULL);
3377 for (i=0; i<5; i++)
3378 io_mem_used[i] = 1;
3380 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3381 watch_mem_write, NULL);
3384 #endif /* !defined(CONFIG_USER_ONLY) */
3386 /* physical memory access (slow version, mainly for debug) */
3387 #if defined(CONFIG_USER_ONLY)
3388 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3389 uint8_t *buf, int len, int is_write)
3391 int l, flags;
3392 target_ulong page;
3393 void * p;
3395 while (len > 0) {
3396 page = addr & TARGET_PAGE_MASK;
3397 l = (page + TARGET_PAGE_SIZE) - addr;
3398 if (l > len)
3399 l = len;
3400 flags = page_get_flags(page);
3401 if (!(flags & PAGE_VALID))
3402 return -1;
3403 if (is_write) {
3404 if (!(flags & PAGE_WRITE))
3405 return -1;
3406 /* XXX: this code should not depend on lock_user */
3407 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3408 return -1;
3409 memcpy(p, buf, l);
3410 unlock_user(p, addr, l);
3411 } else {
3412 if (!(flags & PAGE_READ))
3413 return -1;
3414 /* XXX: this code should not depend on lock_user */
3415 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3416 return -1;
3417 memcpy(buf, p, l);
3418 unlock_user(p, addr, 0);
3420 len -= l;
3421 buf += l;
3422 addr += l;
3424 return 0;
3427 #else
3428 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3429 int len, int is_write)
3431 int l, io_index;
3432 uint8_t *ptr;
3433 uint32_t val;
3434 target_phys_addr_t page;
3435 unsigned long pd;
3436 PhysPageDesc *p;
3438 while (len > 0) {
3439 page = addr & TARGET_PAGE_MASK;
3440 l = (page + TARGET_PAGE_SIZE) - addr;
3441 if (l > len)
3442 l = len;
3443 p = phys_page_find(page >> TARGET_PAGE_BITS);
3444 if (!p) {
3445 pd = IO_MEM_UNASSIGNED;
3446 } else {
3447 pd = p->phys_offset;
3450 if (is_write) {
3451 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3452 target_phys_addr_t addr1 = addr;
3453 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3454 if (p)
3455 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3456 /* XXX: could force cpu_single_env to NULL to avoid
3457 potential bugs */
3458 if (l >= 4 && ((addr1 & 3) == 0)) {
3459 /* 32 bit write access */
3460 val = ldl_p(buf);
3461 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3462 l = 4;
3463 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3464 /* 16 bit write access */
3465 val = lduw_p(buf);
3466 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3467 l = 2;
3468 } else {
3469 /* 8 bit write access */
3470 val = ldub_p(buf);
3471 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3472 l = 1;
3474 } else {
3475 unsigned long addr1;
3476 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3477 /* RAM case */
3478 ptr = qemu_get_ram_ptr(addr1);
3479 memcpy(ptr, buf, l);
3480 if (!cpu_physical_memory_is_dirty(addr1)) {
3481 /* invalidate code */
3482 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3483 /* set dirty bit */
3484 cpu_physical_memory_set_dirty_flags(
3485 addr1, (0xff & ~CODE_DIRTY_FLAG));
3488 } else {
3489 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3490 !(pd & IO_MEM_ROMD)) {
3491 target_phys_addr_t addr1 = addr;
3492 /* I/O case */
3493 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3494 if (p)
3495 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3496 if (l >= 4 && ((addr1 & 3) == 0)) {
3497 /* 32 bit read access */
3498 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3499 stl_p(buf, val);
3500 l = 4;
3501 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3502 /* 16 bit read access */
3503 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3504 stw_p(buf, val);
3505 l = 2;
3506 } else {
3507 /* 8 bit read access */
3508 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3509 stb_p(buf, val);
3510 l = 1;
3512 } else {
3513 /* RAM case */
3514 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3515 (addr & ~TARGET_PAGE_MASK);
3516 memcpy(buf, ptr, l);
3519 len -= l;
3520 buf += l;
3521 addr += l;
3525 /* used for ROM loading : can write in RAM and ROM */
3526 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3527 const uint8_t *buf, int len)
3529 int l;
3530 uint8_t *ptr;
3531 target_phys_addr_t page;
3532 unsigned long pd;
3533 PhysPageDesc *p;
3535 while (len > 0) {
3536 page = addr & TARGET_PAGE_MASK;
3537 l = (page + TARGET_PAGE_SIZE) - addr;
3538 if (l > len)
3539 l = len;
3540 p = phys_page_find(page >> TARGET_PAGE_BITS);
3541 if (!p) {
3542 pd = IO_MEM_UNASSIGNED;
3543 } else {
3544 pd = p->phys_offset;
3547 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3548 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3549 !(pd & IO_MEM_ROMD)) {
3550 /* do nothing */
3551 } else {
3552 unsigned long addr1;
3553 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3554 /* ROM/RAM case */
3555 ptr = qemu_get_ram_ptr(addr1);
3556 memcpy(ptr, buf, l);
3558 len -= l;
3559 buf += l;
3560 addr += l;
3564 typedef struct {
3565 void *buffer;
3566 target_phys_addr_t addr;
3567 target_phys_addr_t len;
3568 } BounceBuffer;
3570 static BounceBuffer bounce;
3572 typedef struct MapClient {
3573 void *opaque;
3574 void (*callback)(void *opaque);
3575 QLIST_ENTRY(MapClient) link;
3576 } MapClient;
3578 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3579 = QLIST_HEAD_INITIALIZER(map_client_list);
3581 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3583 MapClient *client = qemu_malloc(sizeof(*client));
3585 client->opaque = opaque;
3586 client->callback = callback;
3587 QLIST_INSERT_HEAD(&map_client_list, client, link);
3588 return client;
3591 void cpu_unregister_map_client(void *_client)
3593 MapClient *client = (MapClient *)_client;
3595 QLIST_REMOVE(client, link);
3596 qemu_free(client);
3599 static void cpu_notify_map_clients(void)
3601 MapClient *client;
3603 while (!QLIST_EMPTY(&map_client_list)) {
3604 client = QLIST_FIRST(&map_client_list);
3605 client->callback(client->opaque);
3606 cpu_unregister_map_client(client);
3610 /* Map a physical memory region into a host virtual address.
3611 * May map a subset of the requested range, given by and returned in *plen.
3612 * May return NULL if resources needed to perform the mapping are exhausted.
3613 * Use only for reads OR writes - not for read-modify-write operations.
3614 * Use cpu_register_map_client() to know when retrying the map operation is
3615 * likely to succeed.
3617 void *cpu_physical_memory_map(target_phys_addr_t addr,
3618 target_phys_addr_t *plen,
3619 int is_write)
3621 target_phys_addr_t len = *plen;
3622 target_phys_addr_t done = 0;
3623 int l;
3624 uint8_t *ret = NULL;
3625 uint8_t *ptr;
3626 target_phys_addr_t page;
3627 unsigned long pd;
3628 PhysPageDesc *p;
3629 unsigned long addr1;
3631 while (len > 0) {
3632 page = addr & TARGET_PAGE_MASK;
3633 l = (page + TARGET_PAGE_SIZE) - addr;
3634 if (l > len)
3635 l = len;
3636 p = phys_page_find(page >> TARGET_PAGE_BITS);
3637 if (!p) {
3638 pd = IO_MEM_UNASSIGNED;
3639 } else {
3640 pd = p->phys_offset;
3643 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3644 if (done || bounce.buffer) {
3645 break;
3647 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3648 bounce.addr = addr;
3649 bounce.len = l;
3650 if (!is_write) {
3651 cpu_physical_memory_rw(addr, bounce.buffer, l, 0);
3653 ptr = bounce.buffer;
3654 } else {
3655 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3656 ptr = qemu_get_ram_ptr(addr1);
3658 if (!done) {
3659 ret = ptr;
3660 } else if (ret + done != ptr) {
3661 break;
3664 len -= l;
3665 addr += l;
3666 done += l;
3668 *plen = done;
3669 return ret;
3672 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3673 * Will also mark the memory as dirty if is_write == 1. access_len gives
3674 * the amount of memory that was actually read or written by the caller.
3676 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3677 int is_write, target_phys_addr_t access_len)
3679 if (buffer != bounce.buffer) {
3680 if (is_write) {
3681 ram_addr_t addr1 = qemu_ram_addr_from_host(buffer);
3682 while (access_len) {
3683 unsigned l;
3684 l = TARGET_PAGE_SIZE;
3685 if (l > access_len)
3686 l = access_len;
3687 if (!cpu_physical_memory_is_dirty(addr1)) {
3688 /* invalidate code */
3689 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3690 /* set dirty bit */
3691 cpu_physical_memory_set_dirty_flags(
3692 addr1, (0xff & ~CODE_DIRTY_FLAG));
3694 addr1 += l;
3695 access_len -= l;
3698 return;
3700 if (is_write) {
3701 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3703 qemu_vfree(bounce.buffer);
3704 bounce.buffer = NULL;
3705 cpu_notify_map_clients();
3708 /* warning: addr must be aligned */
3709 uint32_t ldl_phys(target_phys_addr_t addr)
3711 int io_index;
3712 uint8_t *ptr;
3713 uint32_t val;
3714 unsigned long pd;
3715 PhysPageDesc *p;
3717 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3718 if (!p) {
3719 pd = IO_MEM_UNASSIGNED;
3720 } else {
3721 pd = p->phys_offset;
3724 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3725 !(pd & IO_MEM_ROMD)) {
3726 /* I/O case */
3727 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3728 if (p)
3729 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3730 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
3731 } else {
3732 /* RAM case */
3733 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3734 (addr & ~TARGET_PAGE_MASK);
3735 val = ldl_p(ptr);
3737 return val;
3740 /* warning: addr must be aligned */
3741 uint64_t ldq_phys(target_phys_addr_t addr)
3743 int io_index;
3744 uint8_t *ptr;
3745 uint64_t val;
3746 unsigned long pd;
3747 PhysPageDesc *p;
3749 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3750 if (!p) {
3751 pd = IO_MEM_UNASSIGNED;
3752 } else {
3753 pd = p->phys_offset;
3756 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3757 !(pd & IO_MEM_ROMD)) {
3758 /* I/O case */
3759 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3760 if (p)
3761 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3762 #ifdef TARGET_WORDS_BIGENDIAN
3763 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
3764 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
3765 #else
3766 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
3767 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
3768 #endif
3769 } else {
3770 /* RAM case */
3771 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3772 (addr & ~TARGET_PAGE_MASK);
3773 val = ldq_p(ptr);
3775 return val;
3778 /* XXX: optimize */
3779 uint32_t ldub_phys(target_phys_addr_t addr)
3781 uint8_t val;
3782 cpu_physical_memory_read(addr, &val, 1);
3783 return val;
3786 /* XXX: optimize */
3787 uint32_t lduw_phys(target_phys_addr_t addr)
3789 uint16_t val;
3790 cpu_physical_memory_read(addr, (uint8_t *)&val, 2);
3791 return tswap16(val);
3794 /* warning: addr must be aligned. The ram page is not masked as dirty
3795 and the code inside is not invalidated. It is useful if the dirty
3796 bits are used to track modified PTEs */
3797 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3799 int io_index;
3800 uint8_t *ptr;
3801 unsigned long pd;
3802 PhysPageDesc *p;
3804 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3805 if (!p) {
3806 pd = IO_MEM_UNASSIGNED;
3807 } else {
3808 pd = p->phys_offset;
3811 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3812 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3813 if (p)
3814 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3815 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
3816 } else {
3817 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3818 ptr = qemu_get_ram_ptr(addr1);
3819 stl_p(ptr, val);
3821 if (unlikely(in_migration)) {
3822 if (!cpu_physical_memory_is_dirty(addr1)) {
3823 /* invalidate code */
3824 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3825 /* set dirty bit */
3826 cpu_physical_memory_set_dirty_flags(
3827 addr1, (0xff & ~CODE_DIRTY_FLAG));
3833 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3835 int io_index;
3836 uint8_t *ptr;
3837 unsigned long pd;
3838 PhysPageDesc *p;
3840 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3841 if (!p) {
3842 pd = IO_MEM_UNASSIGNED;
3843 } else {
3844 pd = p->phys_offset;
3847 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3848 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3849 if (p)
3850 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3851 #ifdef TARGET_WORDS_BIGENDIAN
3852 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
3853 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
3854 #else
3855 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
3856 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
3857 #endif
3858 } else {
3859 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3860 (addr & ~TARGET_PAGE_MASK);
3861 stq_p(ptr, val);
3865 /* warning: addr must be aligned */
3866 void stl_phys(target_phys_addr_t addr, uint32_t val)
3868 int io_index;
3869 uint8_t *ptr;
3870 unsigned long pd;
3871 PhysPageDesc *p;
3873 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3874 if (!p) {
3875 pd = IO_MEM_UNASSIGNED;
3876 } else {
3877 pd = p->phys_offset;
3880 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3881 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3882 if (p)
3883 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3884 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
3885 } else {
3886 unsigned long addr1;
3887 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3888 /* RAM case */
3889 ptr = qemu_get_ram_ptr(addr1);
3890 stl_p(ptr, val);
3891 if (!cpu_physical_memory_is_dirty(addr1)) {
3892 /* invalidate code */
3893 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3894 /* set dirty bit */
3895 cpu_physical_memory_set_dirty_flags(addr1,
3896 (0xff & ~CODE_DIRTY_FLAG));
3901 /* XXX: optimize */
3902 void stb_phys(target_phys_addr_t addr, uint32_t val)
3904 uint8_t v = val;
3905 cpu_physical_memory_write(addr, &v, 1);
3908 /* XXX: optimize */
3909 void stw_phys(target_phys_addr_t addr, uint32_t val)
3911 uint16_t v = tswap16(val);
3912 cpu_physical_memory_write(addr, (const uint8_t *)&v, 2);
3915 /* XXX: optimize */
3916 void stq_phys(target_phys_addr_t addr, uint64_t val)
3918 val = tswap64(val);
3919 cpu_physical_memory_write(addr, (const uint8_t *)&val, 8);
3922 /* virtual memory access for debug (includes writing to ROM) */
3923 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3924 uint8_t *buf, int len, int is_write)
3926 int l;
3927 target_phys_addr_t phys_addr;
3928 target_ulong page;
3930 while (len > 0) {
3931 page = addr & TARGET_PAGE_MASK;
3932 phys_addr = cpu_get_phys_page_debug(env, page);
3933 /* if no physical page mapped, return an error */
3934 if (phys_addr == -1)
3935 return -1;
3936 l = (page + TARGET_PAGE_SIZE) - addr;
3937 if (l > len)
3938 l = len;
3939 phys_addr += (addr & ~TARGET_PAGE_MASK);
3940 if (is_write)
3941 cpu_physical_memory_write_rom(phys_addr, buf, l);
3942 else
3943 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
3944 len -= l;
3945 buf += l;
3946 addr += l;
3948 return 0;
3950 #endif
3952 /* in deterministic execution mode, instructions doing device I/Os
3953 must be at the end of the TB */
3954 void cpu_io_recompile(CPUState *env, void *retaddr)
3956 TranslationBlock *tb;
3957 uint32_t n, cflags;
3958 target_ulong pc, cs_base;
3959 uint64_t flags;
3961 tb = tb_find_pc((unsigned long)retaddr);
3962 if (!tb) {
3963 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
3964 retaddr);
3966 n = env->icount_decr.u16.low + tb->icount;
3967 cpu_restore_state(tb, env, (unsigned long)retaddr, NULL);
3968 /* Calculate how many instructions had been executed before the fault
3969 occurred. */
3970 n = n - env->icount_decr.u16.low;
3971 /* Generate a new TB ending on the I/O insn. */
3972 n++;
3973 /* On MIPS and SH, delay slot instructions can only be restarted if
3974 they were already the first instruction in the TB. If this is not
3975 the first instruction in a TB then re-execute the preceding
3976 branch. */
3977 #if defined(TARGET_MIPS)
3978 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
3979 env->active_tc.PC -= 4;
3980 env->icount_decr.u16.low++;
3981 env->hflags &= ~MIPS_HFLAG_BMASK;
3983 #elif defined(TARGET_SH4)
3984 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
3985 && n > 1) {
3986 env->pc -= 2;
3987 env->icount_decr.u16.low++;
3988 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
3990 #endif
3991 /* This should never happen. */
3992 if (n > CF_COUNT_MASK)
3993 cpu_abort(env, "TB too big during recompile");
3995 cflags = n | CF_LAST_IO;
3996 pc = tb->pc;
3997 cs_base = tb->cs_base;
3998 flags = tb->flags;
3999 tb_phys_invalidate(tb, -1);
4000 /* FIXME: In theory this could raise an exception. In practice
4001 we have already translated the block once so it's probably ok. */
4002 tb_gen_code(env, pc, cs_base, flags, cflags);
4003 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4004 the first in the TB) then we end up generating a whole new TB and
4005 repeating the fault, which is horribly inefficient.
4006 Better would be to execute just this insn uncached, or generate a
4007 second new TB. */
4008 cpu_resume_from_signal(env, NULL);
4011 #if !defined(CONFIG_USER_ONLY)
4013 void dump_exec_info(FILE *f,
4014 int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
4016 int i, target_code_size, max_target_code_size;
4017 int direct_jmp_count, direct_jmp2_count, cross_page;
4018 TranslationBlock *tb;
4020 target_code_size = 0;
4021 max_target_code_size = 0;
4022 cross_page = 0;
4023 direct_jmp_count = 0;
4024 direct_jmp2_count = 0;
4025 for(i = 0; i < nb_tbs; i++) {
4026 tb = &tbs[i];
4027 target_code_size += tb->size;
4028 if (tb->size > max_target_code_size)
4029 max_target_code_size = tb->size;
4030 if (tb->page_addr[1] != -1)
4031 cross_page++;
4032 if (tb->tb_next_offset[0] != 0xffff) {
4033 direct_jmp_count++;
4034 if (tb->tb_next_offset[1] != 0xffff) {
4035 direct_jmp2_count++;
4039 /* XXX: avoid using doubles ? */
4040 cpu_fprintf(f, "Translation buffer state:\n");
4041 cpu_fprintf(f, "gen code size %ld/%ld\n",
4042 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4043 cpu_fprintf(f, "TB count %d/%d\n",
4044 nb_tbs, code_gen_max_blocks);
4045 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4046 nb_tbs ? target_code_size / nb_tbs : 0,
4047 max_target_code_size);
4048 cpu_fprintf(f, "TB avg host size %d bytes (expansion ratio: %0.1f)\n",
4049 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4050 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4051 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4052 cross_page,
4053 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4054 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4055 direct_jmp_count,
4056 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4057 direct_jmp2_count,
4058 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4059 cpu_fprintf(f, "\nStatistics:\n");
4060 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4061 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4062 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4063 tcg_dump_info(f, cpu_fprintf);
4066 #define MMUSUFFIX _cmmu
4067 #define GETPC() NULL
4068 #define env cpu_single_env
4069 #define SOFTMMU_CODE_ACCESS
4071 #define SHIFT 0
4072 #include "softmmu_template.h"
4074 #define SHIFT 1
4075 #include "softmmu_template.h"
4077 #define SHIFT 2
4078 #include "softmmu_template.h"
4080 #define SHIFT 3
4081 #include "softmmu_template.h"
4083 #undef env
4085 #endif