qcow2: Remove request from in-flight list after error
[qemu-kvm.git] / exec.c
bloba6d3bad7472c294587dd70d761f36f5d0b194122
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <unistd.h>
32 #include <inttypes.h>
34 #include "cpu.h"
35 #include "exec-all.h"
36 #include "qemu-common.h"
37 #include "tcg.h"
38 #include "hw/hw.h"
39 #include "osdep.h"
40 #include "kvm.h"
41 #include "qemu-timer.h"
42 #if defined(CONFIG_USER_ONLY)
43 #include <qemu.h>
44 #include <signal.h>
45 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
46 #include <sys/param.h>
47 #if __FreeBSD_version >= 700104
48 #define HAVE_KINFO_GETVMMAP
49 #define sigqueue sigqueue_freebsd /* avoid redefinition */
50 #include <sys/time.h>
51 #include <sys/proc.h>
52 #include <machine/profile.h>
53 #define _KERNEL
54 #include <sys/user.h>
55 #undef _KERNEL
56 #undef sigqueue
57 #include <libutil.h>
58 #endif
59 #endif
60 #endif
62 //#define DEBUG_TB_INVALIDATE
63 //#define DEBUG_FLUSH
64 //#define DEBUG_TLB
65 //#define DEBUG_UNASSIGNED
67 /* make various TB consistency checks */
68 //#define DEBUG_TB_CHECK
69 //#define DEBUG_TLB_CHECK
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
79 #define SMC_BITMAP_USE_THRESHOLD 10
81 static TranslationBlock *tbs;
82 int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88 #if defined(__arm__) || defined(__sparc_v9__)
89 /* The prologue must be reachable with a direct jump. ARM and Sparc64
90 have limited branch ranges (possibly also PPC) so place it in a
91 section close to code segment. */
92 #define code_gen_section \
93 __attribute__((__section__(".gen_code"))) \
94 __attribute__((aligned (32)))
95 #elif defined(_WIN32)
96 /* Maximum alignment for Win32 is 16. */
97 #define code_gen_section \
98 __attribute__((aligned (16)))
99 #else
100 #define code_gen_section \
101 __attribute__((aligned (32)))
102 #endif
104 uint8_t code_gen_prologue[1024] code_gen_section;
105 static uint8_t *code_gen_buffer;
106 static unsigned long code_gen_buffer_size;
107 /* threshold to flush the translated code buffer */
108 static unsigned long code_gen_buffer_max_size;
109 uint8_t *code_gen_ptr;
111 #if !defined(CONFIG_USER_ONLY)
112 int phys_ram_fd;
113 uint8_t *phys_ram_dirty;
114 static int in_migration;
116 typedef struct RAMBlock {
117 uint8_t *host;
118 ram_addr_t offset;
119 ram_addr_t length;
120 struct RAMBlock *next;
121 } RAMBlock;
123 static RAMBlock *ram_blocks;
124 /* TODO: When we implement (and use) ram deallocation (e.g. for hotplug)
125 then we can no longer assume contiguous ram offsets, and external uses
126 of this variable will break. */
127 ram_addr_t last_ram_offset;
128 #endif
130 CPUState *first_cpu;
131 /* current CPU in the current thread. It is only valid inside
132 cpu_exec() */
133 CPUState *cpu_single_env;
134 /* 0 = Do not count executed instructions.
135 1 = Precise instruction counting.
136 2 = Adaptive rate instruction counting. */
137 int use_icount = 0;
138 /* Current instruction counter. While executing translated code this may
139 include some instructions that have not yet been executed. */
140 int64_t qemu_icount;
142 typedef struct PageDesc {
143 /* list of TBs intersecting this ram page */
144 TranslationBlock *first_tb;
145 /* in order to optimize self modifying code, we count the number
146 of lookups we do to a given page to use a bitmap */
147 unsigned int code_write_count;
148 uint8_t *code_bitmap;
149 #if defined(CONFIG_USER_ONLY)
150 unsigned long flags;
151 #endif
152 } PageDesc;
154 /* In system mode we want L1_MAP to be based on ram offsets,
155 while in user mode we want it to be based on virtual addresses. */
156 #if !defined(CONFIG_USER_ONLY)
157 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
158 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
159 #else
160 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
161 #endif
162 #else
163 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
164 #endif
166 /* Size of the L2 (and L3, etc) page tables. */
167 #define L2_BITS 10
168 #define L2_SIZE (1 << L2_BITS)
170 /* The bits remaining after N lower levels of page tables. */
171 #define P_L1_BITS_REM \
172 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
173 #define V_L1_BITS_REM \
174 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
176 /* Size of the L1 page table. Avoid silly small sizes. */
177 #if P_L1_BITS_REM < 4
178 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
179 #else
180 #define P_L1_BITS P_L1_BITS_REM
181 #endif
183 #if V_L1_BITS_REM < 4
184 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
185 #else
186 #define V_L1_BITS V_L1_BITS_REM
187 #endif
189 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
190 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
192 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
193 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
195 unsigned long qemu_real_host_page_size;
196 unsigned long qemu_host_page_bits;
197 unsigned long qemu_host_page_size;
198 unsigned long qemu_host_page_mask;
200 /* This is a multi-level map on the virtual address space.
201 The bottom level has pointers to PageDesc. */
202 static void *l1_map[V_L1_SIZE];
204 #if !defined(CONFIG_USER_ONLY)
205 typedef struct PhysPageDesc {
206 /* offset in host memory of the page + io_index in the low bits */
207 ram_addr_t phys_offset;
208 ram_addr_t region_offset;
209 } PhysPageDesc;
211 /* This is a multi-level map on the physical address space.
212 The bottom level has pointers to PhysPageDesc. */
213 static void *l1_phys_map[P_L1_SIZE];
215 static void io_mem_init(void);
217 /* io memory support */
218 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
219 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
220 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
221 static char io_mem_used[IO_MEM_NB_ENTRIES];
222 static int io_mem_watch;
223 #endif
225 /* log support */
226 #ifdef WIN32
227 static const char *logfilename = "qemu.log";
228 #else
229 static const char *logfilename = "/tmp/qemu.log";
230 #endif
231 FILE *logfile;
232 int loglevel;
233 static int log_append = 0;
235 /* statistics */
236 #if !defined(CONFIG_USER_ONLY)
237 static int tlb_flush_count;
238 #endif
239 static int tb_flush_count;
240 static int tb_phys_invalidate_count;
242 #ifdef _WIN32
243 static void map_exec(void *addr, long size)
245 DWORD old_protect;
246 VirtualProtect(addr, size,
247 PAGE_EXECUTE_READWRITE, &old_protect);
250 #else
251 static void map_exec(void *addr, long size)
253 unsigned long start, end, page_size;
255 page_size = getpagesize();
256 start = (unsigned long)addr;
257 start &= ~(page_size - 1);
259 end = (unsigned long)addr + size;
260 end += page_size - 1;
261 end &= ~(page_size - 1);
263 mprotect((void *)start, end - start,
264 PROT_READ | PROT_WRITE | PROT_EXEC);
266 #endif
268 static void page_init(void)
270 /* NOTE: we can always suppose that qemu_host_page_size >=
271 TARGET_PAGE_SIZE */
272 #ifdef _WIN32
274 SYSTEM_INFO system_info;
276 GetSystemInfo(&system_info);
277 qemu_real_host_page_size = system_info.dwPageSize;
279 #else
280 qemu_real_host_page_size = getpagesize();
281 #endif
282 if (qemu_host_page_size == 0)
283 qemu_host_page_size = qemu_real_host_page_size;
284 if (qemu_host_page_size < TARGET_PAGE_SIZE)
285 qemu_host_page_size = TARGET_PAGE_SIZE;
286 qemu_host_page_bits = 0;
287 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
288 qemu_host_page_bits++;
289 qemu_host_page_mask = ~(qemu_host_page_size - 1);
291 #if !defined(_WIN32) && defined(CONFIG_USER_ONLY)
293 #ifdef HAVE_KINFO_GETVMMAP
294 struct kinfo_vmentry *freep;
295 int i, cnt;
297 freep = kinfo_getvmmap(getpid(), &cnt);
298 if (freep) {
299 mmap_lock();
300 for (i = 0; i < cnt; i++) {
301 unsigned long startaddr, endaddr;
303 startaddr = freep[i].kve_start;
304 endaddr = freep[i].kve_end;
305 if (h2g_valid(startaddr)) {
306 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
308 if (h2g_valid(endaddr)) {
309 endaddr = h2g(endaddr);
310 } else {
311 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
312 endaddr = ~0ul;
313 #else
314 endaddr = ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS) - 1;
315 #endif
317 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
320 free(freep);
321 mmap_unlock();
323 #else
324 FILE *f;
326 last_brk = (unsigned long)sbrk(0);
328 f = fopen("/proc/self/maps", "r");
329 if (f) {
330 mmap_lock();
332 do {
333 unsigned long startaddr, endaddr;
334 int n;
336 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
338 if (n == 2 && h2g_valid(startaddr)) {
339 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
341 if (h2g_valid(endaddr)) {
342 endaddr = h2g(endaddr);
343 } else {
344 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
345 endaddr = ~0ul;
346 #else
347 endaddr = ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS) - 1;
348 #endif
350 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
352 } while (!feof(f));
354 fclose(f);
355 mmap_unlock();
357 #endif
359 #endif
362 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
364 PageDesc *pd;
365 void **lp;
366 int i;
368 #if defined(CONFIG_USER_ONLY)
369 /* We can't use qemu_malloc because it may recurse into a locked mutex.
370 Neither can we record the new pages we reserve while allocating a
371 given page because that may recurse into an unallocated page table
372 entry. Stuff the allocations we do make into a queue and process
373 them after having completed one entire page table allocation. */
375 unsigned long reserve[2 * (V_L1_SHIFT / L2_BITS)];
376 int reserve_idx = 0;
378 # define ALLOC(P, SIZE) \
379 do { \
380 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
381 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
382 if (h2g_valid(P)) { \
383 reserve[reserve_idx] = h2g(P); \
384 reserve[reserve_idx + 1] = SIZE; \
385 reserve_idx += 2; \
387 } while (0)
388 #else
389 # define ALLOC(P, SIZE) \
390 do { P = qemu_mallocz(SIZE); } while (0)
391 #endif
393 /* Level 1. Always allocated. */
394 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
396 /* Level 2..N-1. */
397 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
398 void **p = *lp;
400 if (p == NULL) {
401 if (!alloc) {
402 return NULL;
404 ALLOC(p, sizeof(void *) * L2_SIZE);
405 *lp = p;
408 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
411 pd = *lp;
412 if (pd == NULL) {
413 if (!alloc) {
414 return NULL;
416 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
417 *lp = pd;
420 #undef ALLOC
421 #if defined(CONFIG_USER_ONLY)
422 for (i = 0; i < reserve_idx; i += 2) {
423 unsigned long addr = reserve[i];
424 unsigned long len = reserve[i + 1];
426 page_set_flags(addr & TARGET_PAGE_MASK,
427 TARGET_PAGE_ALIGN(addr + len),
428 PAGE_RESERVED);
430 #endif
432 return pd + (index & (L2_SIZE - 1));
435 static inline PageDesc *page_find(tb_page_addr_t index)
437 return page_find_alloc(index, 0);
440 #if !defined(CONFIG_USER_ONLY)
441 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
443 PhysPageDesc *pd;
444 void **lp;
445 int i;
447 /* Level 1. Always allocated. */
448 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
450 /* Level 2..N-1. */
451 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
452 void **p = *lp;
453 if (p == NULL) {
454 if (!alloc) {
455 return NULL;
457 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
459 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
462 pd = *lp;
463 if (pd == NULL) {
464 int i;
466 if (!alloc) {
467 return NULL;
470 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
472 for (i = 0; i < L2_SIZE; i++) {
473 pd[i].phys_offset = IO_MEM_UNASSIGNED;
474 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
478 return pd + (index & (L2_SIZE - 1));
481 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
483 return phys_page_find_alloc(index, 0);
486 static void tlb_protect_code(ram_addr_t ram_addr);
487 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
488 target_ulong vaddr);
489 #define mmap_lock() do { } while(0)
490 #define mmap_unlock() do { } while(0)
491 #endif
493 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
495 #if defined(CONFIG_USER_ONLY)
496 /* Currently it is not recommended to allocate big chunks of data in
497 user mode. It will change when a dedicated libc will be used */
498 #define USE_STATIC_CODE_GEN_BUFFER
499 #endif
501 #ifdef USE_STATIC_CODE_GEN_BUFFER
502 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
503 __attribute__((aligned (CODE_GEN_ALIGN)));
504 #endif
506 static void code_gen_alloc(unsigned long tb_size)
508 #ifdef USE_STATIC_CODE_GEN_BUFFER
509 code_gen_buffer = static_code_gen_buffer;
510 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
511 map_exec(code_gen_buffer, code_gen_buffer_size);
512 #else
513 code_gen_buffer_size = tb_size;
514 if (code_gen_buffer_size == 0) {
515 #if defined(CONFIG_USER_ONLY)
516 /* in user mode, phys_ram_size is not meaningful */
517 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
518 #else
519 /* XXX: needs adjustments */
520 code_gen_buffer_size = (unsigned long)(ram_size / 4);
521 #endif
523 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
524 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
525 /* The code gen buffer location may have constraints depending on
526 the host cpu and OS */
527 #if defined(__linux__)
529 int flags;
530 void *start = NULL;
532 flags = MAP_PRIVATE | MAP_ANONYMOUS;
533 #if defined(__x86_64__)
534 flags |= MAP_32BIT;
535 /* Cannot map more than that */
536 if (code_gen_buffer_size > (800 * 1024 * 1024))
537 code_gen_buffer_size = (800 * 1024 * 1024);
538 #elif defined(__sparc_v9__)
539 // Map the buffer below 2G, so we can use direct calls and branches
540 flags |= MAP_FIXED;
541 start = (void *) 0x60000000UL;
542 if (code_gen_buffer_size > (512 * 1024 * 1024))
543 code_gen_buffer_size = (512 * 1024 * 1024);
544 #elif defined(__arm__)
545 /* Map the buffer below 32M, so we can use direct calls and branches */
546 flags |= MAP_FIXED;
547 start = (void *) 0x01000000UL;
548 if (code_gen_buffer_size > 16 * 1024 * 1024)
549 code_gen_buffer_size = 16 * 1024 * 1024;
550 #endif
551 code_gen_buffer = mmap(start, code_gen_buffer_size,
552 PROT_WRITE | PROT_READ | PROT_EXEC,
553 flags, -1, 0);
554 if (code_gen_buffer == MAP_FAILED) {
555 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
556 exit(1);
559 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
561 int flags;
562 void *addr = NULL;
563 flags = MAP_PRIVATE | MAP_ANONYMOUS;
564 #if defined(__x86_64__)
565 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
566 * 0x40000000 is free */
567 flags |= MAP_FIXED;
568 addr = (void *)0x40000000;
569 /* Cannot map more than that */
570 if (code_gen_buffer_size > (800 * 1024 * 1024))
571 code_gen_buffer_size = (800 * 1024 * 1024);
572 #endif
573 code_gen_buffer = mmap(addr, code_gen_buffer_size,
574 PROT_WRITE | PROT_READ | PROT_EXEC,
575 flags, -1, 0);
576 if (code_gen_buffer == MAP_FAILED) {
577 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
578 exit(1);
581 #else
582 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
583 map_exec(code_gen_buffer, code_gen_buffer_size);
584 #endif
585 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
586 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
587 code_gen_buffer_max_size = code_gen_buffer_size -
588 code_gen_max_block_size();
589 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
590 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
593 /* Must be called before using the QEMU cpus. 'tb_size' is the size
594 (in bytes) allocated to the translation buffer. Zero means default
595 size. */
596 void cpu_exec_init_all(unsigned long tb_size)
598 cpu_gen_init();
599 code_gen_alloc(tb_size);
600 code_gen_ptr = code_gen_buffer;
601 page_init();
602 #if !defined(CONFIG_USER_ONLY)
603 io_mem_init();
604 #endif
607 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
609 static int cpu_common_post_load(void *opaque, int version_id)
611 CPUState *env = opaque;
613 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
614 version_id is increased. */
615 env->interrupt_request &= ~0x01;
616 tlb_flush(env, 1);
618 return 0;
621 static const VMStateDescription vmstate_cpu_common = {
622 .name = "cpu_common",
623 .version_id = 1,
624 .minimum_version_id = 1,
625 .minimum_version_id_old = 1,
626 .post_load = cpu_common_post_load,
627 .fields = (VMStateField []) {
628 VMSTATE_UINT32(halted, CPUState),
629 VMSTATE_UINT32(interrupt_request, CPUState),
630 VMSTATE_END_OF_LIST()
633 #endif
635 CPUState *qemu_get_cpu(int cpu)
637 CPUState *env = first_cpu;
639 while (env) {
640 if (env->cpu_index == cpu)
641 break;
642 env = env->next_cpu;
645 return env;
648 void cpu_exec_init(CPUState *env)
650 CPUState **penv;
651 int cpu_index;
653 #if defined(CONFIG_USER_ONLY)
654 cpu_list_lock();
655 #endif
656 env->next_cpu = NULL;
657 penv = &first_cpu;
658 cpu_index = 0;
659 while (*penv != NULL) {
660 penv = &(*penv)->next_cpu;
661 cpu_index++;
663 env->cpu_index = cpu_index;
664 env->numa_node = 0;
665 QTAILQ_INIT(&env->breakpoints);
666 QTAILQ_INIT(&env->watchpoints);
667 *penv = env;
668 #if defined(CONFIG_USER_ONLY)
669 cpu_list_unlock();
670 #endif
671 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
672 vmstate_register(cpu_index, &vmstate_cpu_common, env);
673 register_savevm("cpu", cpu_index, CPU_SAVE_VERSION,
674 cpu_save, cpu_load, env);
675 #endif
678 static inline void invalidate_page_bitmap(PageDesc *p)
680 if (p->code_bitmap) {
681 qemu_free(p->code_bitmap);
682 p->code_bitmap = NULL;
684 p->code_write_count = 0;
687 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
689 static void page_flush_tb_1 (int level, void **lp)
691 int i;
693 if (*lp == NULL) {
694 return;
696 if (level == 0) {
697 PageDesc *pd = *lp;
698 for (i = 0; i < L2_SIZE; ++i) {
699 pd[i].first_tb = NULL;
700 invalidate_page_bitmap(pd + i);
702 } else {
703 void **pp = *lp;
704 for (i = 0; i < L2_SIZE; ++i) {
705 page_flush_tb_1 (level - 1, pp + i);
710 static void page_flush_tb(void)
712 int i;
713 for (i = 0; i < V_L1_SIZE; i++) {
714 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
718 /* flush all the translation blocks */
719 /* XXX: tb_flush is currently not thread safe */
720 void tb_flush(CPUState *env1)
722 CPUState *env;
723 #if defined(DEBUG_FLUSH)
724 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
725 (unsigned long)(code_gen_ptr - code_gen_buffer),
726 nb_tbs, nb_tbs > 0 ?
727 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
728 #endif
729 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
730 cpu_abort(env1, "Internal error: code buffer overflow\n");
732 nb_tbs = 0;
734 for(env = first_cpu; env != NULL; env = env->next_cpu) {
735 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
738 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
739 page_flush_tb();
741 code_gen_ptr = code_gen_buffer;
742 /* XXX: flush processor icache at this point if cache flush is
743 expensive */
744 tb_flush_count++;
747 #ifdef DEBUG_TB_CHECK
749 static void tb_invalidate_check(target_ulong address)
751 TranslationBlock *tb;
752 int i;
753 address &= TARGET_PAGE_MASK;
754 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
755 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
756 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
757 address >= tb->pc + tb->size)) {
758 printf("ERROR invalidate: address=" TARGET_FMT_lx
759 " PC=%08lx size=%04x\n",
760 address, (long)tb->pc, tb->size);
766 /* verify that all the pages have correct rights for code */
767 static void tb_page_check(void)
769 TranslationBlock *tb;
770 int i, flags1, flags2;
772 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
773 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
774 flags1 = page_get_flags(tb->pc);
775 flags2 = page_get_flags(tb->pc + tb->size - 1);
776 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
777 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
778 (long)tb->pc, tb->size, flags1, flags2);
784 #endif
786 /* invalidate one TB */
787 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
788 int next_offset)
790 TranslationBlock *tb1;
791 for(;;) {
792 tb1 = *ptb;
793 if (tb1 == tb) {
794 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
795 break;
797 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
801 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
803 TranslationBlock *tb1;
804 unsigned int n1;
806 for(;;) {
807 tb1 = *ptb;
808 n1 = (long)tb1 & 3;
809 tb1 = (TranslationBlock *)((long)tb1 & ~3);
810 if (tb1 == tb) {
811 *ptb = tb1->page_next[n1];
812 break;
814 ptb = &tb1->page_next[n1];
818 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
820 TranslationBlock *tb1, **ptb;
821 unsigned int n1;
823 ptb = &tb->jmp_next[n];
824 tb1 = *ptb;
825 if (tb1) {
826 /* find tb(n) in circular list */
827 for(;;) {
828 tb1 = *ptb;
829 n1 = (long)tb1 & 3;
830 tb1 = (TranslationBlock *)((long)tb1 & ~3);
831 if (n1 == n && tb1 == tb)
832 break;
833 if (n1 == 2) {
834 ptb = &tb1->jmp_first;
835 } else {
836 ptb = &tb1->jmp_next[n1];
839 /* now we can suppress tb(n) from the list */
840 *ptb = tb->jmp_next[n];
842 tb->jmp_next[n] = NULL;
846 /* reset the jump entry 'n' of a TB so that it is not chained to
847 another TB */
848 static inline void tb_reset_jump(TranslationBlock *tb, int n)
850 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
853 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
855 CPUState *env;
856 PageDesc *p;
857 unsigned int h, n1;
858 tb_page_addr_t phys_pc;
859 TranslationBlock *tb1, *tb2;
861 /* remove the TB from the hash list */
862 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
863 h = tb_phys_hash_func(phys_pc);
864 tb_remove(&tb_phys_hash[h], tb,
865 offsetof(TranslationBlock, phys_hash_next));
867 /* remove the TB from the page list */
868 if (tb->page_addr[0] != page_addr) {
869 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
870 tb_page_remove(&p->first_tb, tb);
871 invalidate_page_bitmap(p);
873 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
874 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
875 tb_page_remove(&p->first_tb, tb);
876 invalidate_page_bitmap(p);
879 tb_invalidated_flag = 1;
881 /* remove the TB from the hash list */
882 h = tb_jmp_cache_hash_func(tb->pc);
883 for(env = first_cpu; env != NULL; env = env->next_cpu) {
884 if (env->tb_jmp_cache[h] == tb)
885 env->tb_jmp_cache[h] = NULL;
888 /* suppress this TB from the two jump lists */
889 tb_jmp_remove(tb, 0);
890 tb_jmp_remove(tb, 1);
892 /* suppress any remaining jumps to this TB */
893 tb1 = tb->jmp_first;
894 for(;;) {
895 n1 = (long)tb1 & 3;
896 if (n1 == 2)
897 break;
898 tb1 = (TranslationBlock *)((long)tb1 & ~3);
899 tb2 = tb1->jmp_next[n1];
900 tb_reset_jump(tb1, n1);
901 tb1->jmp_next[n1] = NULL;
902 tb1 = tb2;
904 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
906 tb_phys_invalidate_count++;
909 static inline void set_bits(uint8_t *tab, int start, int len)
911 int end, mask, end1;
913 end = start + len;
914 tab += start >> 3;
915 mask = 0xff << (start & 7);
916 if ((start & ~7) == (end & ~7)) {
917 if (start < end) {
918 mask &= ~(0xff << (end & 7));
919 *tab |= mask;
921 } else {
922 *tab++ |= mask;
923 start = (start + 8) & ~7;
924 end1 = end & ~7;
925 while (start < end1) {
926 *tab++ = 0xff;
927 start += 8;
929 if (start < end) {
930 mask = ~(0xff << (end & 7));
931 *tab |= mask;
936 static void build_page_bitmap(PageDesc *p)
938 int n, tb_start, tb_end;
939 TranslationBlock *tb;
941 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
943 tb = p->first_tb;
944 while (tb != NULL) {
945 n = (long)tb & 3;
946 tb = (TranslationBlock *)((long)tb & ~3);
947 /* NOTE: this is subtle as a TB may span two physical pages */
948 if (n == 0) {
949 /* NOTE: tb_end may be after the end of the page, but
950 it is not a problem */
951 tb_start = tb->pc & ~TARGET_PAGE_MASK;
952 tb_end = tb_start + tb->size;
953 if (tb_end > TARGET_PAGE_SIZE)
954 tb_end = TARGET_PAGE_SIZE;
955 } else {
956 tb_start = 0;
957 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
959 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
960 tb = tb->page_next[n];
964 TranslationBlock *tb_gen_code(CPUState *env,
965 target_ulong pc, target_ulong cs_base,
966 int flags, int cflags)
968 TranslationBlock *tb;
969 uint8_t *tc_ptr;
970 tb_page_addr_t phys_pc, phys_page2;
971 target_ulong virt_page2;
972 int code_gen_size;
974 phys_pc = get_page_addr_code(env, pc);
975 tb = tb_alloc(pc);
976 if (!tb) {
977 /* flush must be done */
978 tb_flush(env);
979 /* cannot fail at this point */
980 tb = tb_alloc(pc);
981 /* Don't forget to invalidate previous TB info. */
982 tb_invalidated_flag = 1;
984 tc_ptr = code_gen_ptr;
985 tb->tc_ptr = tc_ptr;
986 tb->cs_base = cs_base;
987 tb->flags = flags;
988 tb->cflags = cflags;
989 cpu_gen_code(env, tb, &code_gen_size);
990 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
992 /* check next page if needed */
993 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
994 phys_page2 = -1;
995 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
996 phys_page2 = get_page_addr_code(env, virt_page2);
998 tb_link_page(tb, phys_pc, phys_page2);
999 return tb;
1002 /* invalidate all TBs which intersect with the target physical page
1003 starting in range [start;end[. NOTE: start and end must refer to
1004 the same physical page. 'is_cpu_write_access' should be true if called
1005 from a real cpu write access: the virtual CPU will exit the current
1006 TB if code is modified inside this TB. */
1007 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1008 int is_cpu_write_access)
1010 TranslationBlock *tb, *tb_next, *saved_tb;
1011 CPUState *env = cpu_single_env;
1012 tb_page_addr_t tb_start, tb_end;
1013 PageDesc *p;
1014 int n;
1015 #ifdef TARGET_HAS_PRECISE_SMC
1016 int current_tb_not_found = is_cpu_write_access;
1017 TranslationBlock *current_tb = NULL;
1018 int current_tb_modified = 0;
1019 target_ulong current_pc = 0;
1020 target_ulong current_cs_base = 0;
1021 int current_flags = 0;
1022 #endif /* TARGET_HAS_PRECISE_SMC */
1024 p = page_find(start >> TARGET_PAGE_BITS);
1025 if (!p)
1026 return;
1027 if (!p->code_bitmap &&
1028 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1029 is_cpu_write_access) {
1030 /* build code bitmap */
1031 build_page_bitmap(p);
1034 /* we remove all the TBs in the range [start, end[ */
1035 /* XXX: see if in some cases it could be faster to invalidate all the code */
1036 tb = p->first_tb;
1037 while (tb != NULL) {
1038 n = (long)tb & 3;
1039 tb = (TranslationBlock *)((long)tb & ~3);
1040 tb_next = tb->page_next[n];
1041 /* NOTE: this is subtle as a TB may span two physical pages */
1042 if (n == 0) {
1043 /* NOTE: tb_end may be after the end of the page, but
1044 it is not a problem */
1045 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1046 tb_end = tb_start + tb->size;
1047 } else {
1048 tb_start = tb->page_addr[1];
1049 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1051 if (!(tb_end <= start || tb_start >= end)) {
1052 #ifdef TARGET_HAS_PRECISE_SMC
1053 if (current_tb_not_found) {
1054 current_tb_not_found = 0;
1055 current_tb = NULL;
1056 if (env->mem_io_pc) {
1057 /* now we have a real cpu fault */
1058 current_tb = tb_find_pc(env->mem_io_pc);
1061 if (current_tb == tb &&
1062 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1063 /* If we are modifying the current TB, we must stop
1064 its execution. We could be more precise by checking
1065 that the modification is after the current PC, but it
1066 would require a specialized function to partially
1067 restore the CPU state */
1069 current_tb_modified = 1;
1070 cpu_restore_state(current_tb, env,
1071 env->mem_io_pc, NULL);
1072 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1073 &current_flags);
1075 #endif /* TARGET_HAS_PRECISE_SMC */
1076 /* we need to do that to handle the case where a signal
1077 occurs while doing tb_phys_invalidate() */
1078 saved_tb = NULL;
1079 if (env) {
1080 saved_tb = env->current_tb;
1081 env->current_tb = NULL;
1083 tb_phys_invalidate(tb, -1);
1084 if (env) {
1085 env->current_tb = saved_tb;
1086 if (env->interrupt_request && env->current_tb)
1087 cpu_interrupt(env, env->interrupt_request);
1090 tb = tb_next;
1092 #if !defined(CONFIG_USER_ONLY)
1093 /* if no code remaining, no need to continue to use slow writes */
1094 if (!p->first_tb) {
1095 invalidate_page_bitmap(p);
1096 if (is_cpu_write_access) {
1097 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1100 #endif
1101 #ifdef TARGET_HAS_PRECISE_SMC
1102 if (current_tb_modified) {
1103 /* we generate a block containing just the instruction
1104 modifying the memory. It will ensure that it cannot modify
1105 itself */
1106 env->current_tb = NULL;
1107 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1108 cpu_resume_from_signal(env, NULL);
1110 #endif
1113 /* len must be <= 8 and start must be a multiple of len */
1114 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1116 PageDesc *p;
1117 int offset, b;
1118 #if 0
1119 if (1) {
1120 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1121 cpu_single_env->mem_io_vaddr, len,
1122 cpu_single_env->eip,
1123 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1125 #endif
1126 p = page_find(start >> TARGET_PAGE_BITS);
1127 if (!p)
1128 return;
1129 if (p->code_bitmap) {
1130 offset = start & ~TARGET_PAGE_MASK;
1131 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1132 if (b & ((1 << len) - 1))
1133 goto do_invalidate;
1134 } else {
1135 do_invalidate:
1136 tb_invalidate_phys_page_range(start, start + len, 1);
1140 #if !defined(CONFIG_SOFTMMU)
1141 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1142 unsigned long pc, void *puc)
1144 TranslationBlock *tb;
1145 PageDesc *p;
1146 int n;
1147 #ifdef TARGET_HAS_PRECISE_SMC
1148 TranslationBlock *current_tb = NULL;
1149 CPUState *env = cpu_single_env;
1150 int current_tb_modified = 0;
1151 target_ulong current_pc = 0;
1152 target_ulong current_cs_base = 0;
1153 int current_flags = 0;
1154 #endif
1156 addr &= TARGET_PAGE_MASK;
1157 p = page_find(addr >> TARGET_PAGE_BITS);
1158 if (!p)
1159 return;
1160 tb = p->first_tb;
1161 #ifdef TARGET_HAS_PRECISE_SMC
1162 if (tb && pc != 0) {
1163 current_tb = tb_find_pc(pc);
1165 #endif
1166 while (tb != NULL) {
1167 n = (long)tb & 3;
1168 tb = (TranslationBlock *)((long)tb & ~3);
1169 #ifdef TARGET_HAS_PRECISE_SMC
1170 if (current_tb == tb &&
1171 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1172 /* If we are modifying the current TB, we must stop
1173 its execution. We could be more precise by checking
1174 that the modification is after the current PC, but it
1175 would require a specialized function to partially
1176 restore the CPU state */
1178 current_tb_modified = 1;
1179 cpu_restore_state(current_tb, env, pc, puc);
1180 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1181 &current_flags);
1183 #endif /* TARGET_HAS_PRECISE_SMC */
1184 tb_phys_invalidate(tb, addr);
1185 tb = tb->page_next[n];
1187 p->first_tb = NULL;
1188 #ifdef TARGET_HAS_PRECISE_SMC
1189 if (current_tb_modified) {
1190 /* we generate a block containing just the instruction
1191 modifying the memory. It will ensure that it cannot modify
1192 itself */
1193 env->current_tb = NULL;
1194 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1195 cpu_resume_from_signal(env, puc);
1197 #endif
1199 #endif
1201 /* add the tb in the target page and protect it if necessary */
1202 static inline void tb_alloc_page(TranslationBlock *tb,
1203 unsigned int n, tb_page_addr_t page_addr)
1205 PageDesc *p;
1206 TranslationBlock *last_first_tb;
1208 tb->page_addr[n] = page_addr;
1209 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1210 tb->page_next[n] = p->first_tb;
1211 last_first_tb = p->first_tb;
1212 p->first_tb = (TranslationBlock *)((long)tb | n);
1213 invalidate_page_bitmap(p);
1215 #if defined(TARGET_HAS_SMC) || 1
1217 #if defined(CONFIG_USER_ONLY)
1218 if (p->flags & PAGE_WRITE) {
1219 target_ulong addr;
1220 PageDesc *p2;
1221 int prot;
1223 /* force the host page as non writable (writes will have a
1224 page fault + mprotect overhead) */
1225 page_addr &= qemu_host_page_mask;
1226 prot = 0;
1227 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1228 addr += TARGET_PAGE_SIZE) {
1230 p2 = page_find (addr >> TARGET_PAGE_BITS);
1231 if (!p2)
1232 continue;
1233 prot |= p2->flags;
1234 p2->flags &= ~PAGE_WRITE;
1236 mprotect(g2h(page_addr), qemu_host_page_size,
1237 (prot & PAGE_BITS) & ~PAGE_WRITE);
1238 #ifdef DEBUG_TB_INVALIDATE
1239 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1240 page_addr);
1241 #endif
1243 #else
1244 /* if some code is already present, then the pages are already
1245 protected. So we handle the case where only the first TB is
1246 allocated in a physical page */
1247 if (!last_first_tb) {
1248 tlb_protect_code(page_addr);
1250 #endif
1252 #endif /* TARGET_HAS_SMC */
1255 /* Allocate a new translation block. Flush the translation buffer if
1256 too many translation blocks or too much generated code. */
1257 TranslationBlock *tb_alloc(target_ulong pc)
1259 TranslationBlock *tb;
1261 if (nb_tbs >= code_gen_max_blocks ||
1262 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
1263 return NULL;
1264 tb = &tbs[nb_tbs++];
1265 tb->pc = pc;
1266 tb->cflags = 0;
1267 return tb;
1270 void tb_free(TranslationBlock *tb)
1272 /* In practice this is mostly used for single use temporary TB
1273 Ignore the hard cases and just back up if this TB happens to
1274 be the last one generated. */
1275 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
1276 code_gen_ptr = tb->tc_ptr;
1277 nb_tbs--;
1281 /* add a new TB and link it to the physical page tables. phys_page2 is
1282 (-1) to indicate that only one page contains the TB. */
1283 void tb_link_page(TranslationBlock *tb,
1284 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1286 unsigned int h;
1287 TranslationBlock **ptb;
1289 /* Grab the mmap lock to stop another thread invalidating this TB
1290 before we are done. */
1291 mmap_lock();
1292 /* add in the physical hash table */
1293 h = tb_phys_hash_func(phys_pc);
1294 ptb = &tb_phys_hash[h];
1295 tb->phys_hash_next = *ptb;
1296 *ptb = tb;
1298 /* add in the page list */
1299 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1300 if (phys_page2 != -1)
1301 tb_alloc_page(tb, 1, phys_page2);
1302 else
1303 tb->page_addr[1] = -1;
1305 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1306 tb->jmp_next[0] = NULL;
1307 tb->jmp_next[1] = NULL;
1309 /* init original jump addresses */
1310 if (tb->tb_next_offset[0] != 0xffff)
1311 tb_reset_jump(tb, 0);
1312 if (tb->tb_next_offset[1] != 0xffff)
1313 tb_reset_jump(tb, 1);
1315 #ifdef DEBUG_TB_CHECK
1316 tb_page_check();
1317 #endif
1318 mmap_unlock();
1321 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1322 tb[1].tc_ptr. Return NULL if not found */
1323 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1325 int m_min, m_max, m;
1326 unsigned long v;
1327 TranslationBlock *tb;
1329 if (nb_tbs <= 0)
1330 return NULL;
1331 if (tc_ptr < (unsigned long)code_gen_buffer ||
1332 tc_ptr >= (unsigned long)code_gen_ptr)
1333 return NULL;
1334 /* binary search (cf Knuth) */
1335 m_min = 0;
1336 m_max = nb_tbs - 1;
1337 while (m_min <= m_max) {
1338 m = (m_min + m_max) >> 1;
1339 tb = &tbs[m];
1340 v = (unsigned long)tb->tc_ptr;
1341 if (v == tc_ptr)
1342 return tb;
1343 else if (tc_ptr < v) {
1344 m_max = m - 1;
1345 } else {
1346 m_min = m + 1;
1349 return &tbs[m_max];
1352 static void tb_reset_jump_recursive(TranslationBlock *tb);
1354 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1356 TranslationBlock *tb1, *tb_next, **ptb;
1357 unsigned int n1;
1359 tb1 = tb->jmp_next[n];
1360 if (tb1 != NULL) {
1361 /* find head of list */
1362 for(;;) {
1363 n1 = (long)tb1 & 3;
1364 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1365 if (n1 == 2)
1366 break;
1367 tb1 = tb1->jmp_next[n1];
1369 /* we are now sure now that tb jumps to tb1 */
1370 tb_next = tb1;
1372 /* remove tb from the jmp_first list */
1373 ptb = &tb_next->jmp_first;
1374 for(;;) {
1375 tb1 = *ptb;
1376 n1 = (long)tb1 & 3;
1377 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1378 if (n1 == n && tb1 == tb)
1379 break;
1380 ptb = &tb1->jmp_next[n1];
1382 *ptb = tb->jmp_next[n];
1383 tb->jmp_next[n] = NULL;
1385 /* suppress the jump to next tb in generated code */
1386 tb_reset_jump(tb, n);
1388 /* suppress jumps in the tb on which we could have jumped */
1389 tb_reset_jump_recursive(tb_next);
1393 static void tb_reset_jump_recursive(TranslationBlock *tb)
1395 tb_reset_jump_recursive2(tb, 0);
1396 tb_reset_jump_recursive2(tb, 1);
1399 #if defined(TARGET_HAS_ICE)
1400 #if defined(CONFIG_USER_ONLY)
1401 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1403 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1405 #else
1406 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1408 target_phys_addr_t addr;
1409 target_ulong pd;
1410 ram_addr_t ram_addr;
1411 PhysPageDesc *p;
1413 addr = cpu_get_phys_page_debug(env, pc);
1414 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1415 if (!p) {
1416 pd = IO_MEM_UNASSIGNED;
1417 } else {
1418 pd = p->phys_offset;
1420 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1421 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1423 #endif
1424 #endif /* TARGET_HAS_ICE */
1426 #if defined(CONFIG_USER_ONLY)
1427 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1432 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1433 int flags, CPUWatchpoint **watchpoint)
1435 return -ENOSYS;
1437 #else
1438 /* Add a watchpoint. */
1439 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1440 int flags, CPUWatchpoint **watchpoint)
1442 target_ulong len_mask = ~(len - 1);
1443 CPUWatchpoint *wp;
1445 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1446 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1447 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1448 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1449 return -EINVAL;
1451 wp = qemu_malloc(sizeof(*wp));
1453 wp->vaddr = addr;
1454 wp->len_mask = len_mask;
1455 wp->flags = flags;
1457 /* keep all GDB-injected watchpoints in front */
1458 if (flags & BP_GDB)
1459 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1460 else
1461 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1463 tlb_flush_page(env, addr);
1465 if (watchpoint)
1466 *watchpoint = wp;
1467 return 0;
1470 /* Remove a specific watchpoint. */
1471 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1472 int flags)
1474 target_ulong len_mask = ~(len - 1);
1475 CPUWatchpoint *wp;
1477 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1478 if (addr == wp->vaddr && len_mask == wp->len_mask
1479 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1480 cpu_watchpoint_remove_by_ref(env, wp);
1481 return 0;
1484 return -ENOENT;
1487 /* Remove a specific watchpoint by reference. */
1488 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1490 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1492 tlb_flush_page(env, watchpoint->vaddr);
1494 qemu_free(watchpoint);
1497 /* Remove all matching watchpoints. */
1498 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1500 CPUWatchpoint *wp, *next;
1502 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1503 if (wp->flags & mask)
1504 cpu_watchpoint_remove_by_ref(env, wp);
1507 #endif
1509 /* Add a breakpoint. */
1510 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1511 CPUBreakpoint **breakpoint)
1513 #if defined(TARGET_HAS_ICE)
1514 CPUBreakpoint *bp;
1516 bp = qemu_malloc(sizeof(*bp));
1518 bp->pc = pc;
1519 bp->flags = flags;
1521 /* keep all GDB-injected breakpoints in front */
1522 if (flags & BP_GDB)
1523 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1524 else
1525 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1527 breakpoint_invalidate(env, pc);
1529 if (breakpoint)
1530 *breakpoint = bp;
1531 return 0;
1532 #else
1533 return -ENOSYS;
1534 #endif
1537 /* Remove a specific breakpoint. */
1538 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1540 #if defined(TARGET_HAS_ICE)
1541 CPUBreakpoint *bp;
1543 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1544 if (bp->pc == pc && bp->flags == flags) {
1545 cpu_breakpoint_remove_by_ref(env, bp);
1546 return 0;
1549 return -ENOENT;
1550 #else
1551 return -ENOSYS;
1552 #endif
1555 /* Remove a specific breakpoint by reference. */
1556 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1558 #if defined(TARGET_HAS_ICE)
1559 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1561 breakpoint_invalidate(env, breakpoint->pc);
1563 qemu_free(breakpoint);
1564 #endif
1567 /* Remove all matching breakpoints. */
1568 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1570 #if defined(TARGET_HAS_ICE)
1571 CPUBreakpoint *bp, *next;
1573 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1574 if (bp->flags & mask)
1575 cpu_breakpoint_remove_by_ref(env, bp);
1577 #endif
1580 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1581 CPU loop after each instruction */
1582 void cpu_single_step(CPUState *env, int enabled)
1584 #if defined(TARGET_HAS_ICE)
1585 if (env->singlestep_enabled != enabled) {
1586 env->singlestep_enabled = enabled;
1587 if (kvm_enabled())
1588 kvm_update_guest_debug(env, 0);
1589 else {
1590 /* must flush all the translated code to avoid inconsistencies */
1591 /* XXX: only flush what is necessary */
1592 tb_flush(env);
1595 #endif
1598 /* enable or disable low levels log */
1599 void cpu_set_log(int log_flags)
1601 loglevel = log_flags;
1602 if (loglevel && !logfile) {
1603 logfile = fopen(logfilename, log_append ? "a" : "w");
1604 if (!logfile) {
1605 perror(logfilename);
1606 _exit(1);
1608 #if !defined(CONFIG_SOFTMMU)
1609 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1611 static char logfile_buf[4096];
1612 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1614 #elif !defined(_WIN32)
1615 /* Win32 doesn't support line-buffering and requires size >= 2 */
1616 setvbuf(logfile, NULL, _IOLBF, 0);
1617 #endif
1618 log_append = 1;
1620 if (!loglevel && logfile) {
1621 fclose(logfile);
1622 logfile = NULL;
1626 void cpu_set_log_filename(const char *filename)
1628 logfilename = strdup(filename);
1629 if (logfile) {
1630 fclose(logfile);
1631 logfile = NULL;
1633 cpu_set_log(loglevel);
1636 static void cpu_unlink_tb(CPUState *env)
1638 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1639 problem and hope the cpu will stop of its own accord. For userspace
1640 emulation this often isn't actually as bad as it sounds. Often
1641 signals are used primarily to interrupt blocking syscalls. */
1642 TranslationBlock *tb;
1643 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1645 spin_lock(&interrupt_lock);
1646 tb = env->current_tb;
1647 /* if the cpu is currently executing code, we must unlink it and
1648 all the potentially executing TB */
1649 if (tb) {
1650 env->current_tb = NULL;
1651 tb_reset_jump_recursive(tb);
1653 spin_unlock(&interrupt_lock);
1656 /* mask must never be zero, except for A20 change call */
1657 void cpu_interrupt(CPUState *env, int mask)
1659 int old_mask;
1661 old_mask = env->interrupt_request;
1662 env->interrupt_request |= mask;
1664 #ifndef CONFIG_USER_ONLY
1666 * If called from iothread context, wake the target cpu in
1667 * case its halted.
1669 if (!qemu_cpu_self(env)) {
1670 qemu_cpu_kick(env);
1671 return;
1673 #endif
1675 if (use_icount) {
1676 env->icount_decr.u16.high = 0xffff;
1677 #ifndef CONFIG_USER_ONLY
1678 if (!can_do_io(env)
1679 && (mask & ~old_mask) != 0) {
1680 cpu_abort(env, "Raised interrupt while not in I/O function");
1682 #endif
1683 } else {
1684 cpu_unlink_tb(env);
1688 void cpu_reset_interrupt(CPUState *env, int mask)
1690 env->interrupt_request &= ~mask;
1693 void cpu_exit(CPUState *env)
1695 env->exit_request = 1;
1696 cpu_unlink_tb(env);
1699 const CPULogItem cpu_log_items[] = {
1700 { CPU_LOG_TB_OUT_ASM, "out_asm",
1701 "show generated host assembly code for each compiled TB" },
1702 { CPU_LOG_TB_IN_ASM, "in_asm",
1703 "show target assembly code for each compiled TB" },
1704 { CPU_LOG_TB_OP, "op",
1705 "show micro ops for each compiled TB" },
1706 { CPU_LOG_TB_OP_OPT, "op_opt",
1707 "show micro ops "
1708 #ifdef TARGET_I386
1709 "before eflags optimization and "
1710 #endif
1711 "after liveness analysis" },
1712 { CPU_LOG_INT, "int",
1713 "show interrupts/exceptions in short format" },
1714 { CPU_LOG_EXEC, "exec",
1715 "show trace before each executed TB (lots of logs)" },
1716 { CPU_LOG_TB_CPU, "cpu",
1717 "show CPU state before block translation" },
1718 #ifdef TARGET_I386
1719 { CPU_LOG_PCALL, "pcall",
1720 "show protected mode far calls/returns/exceptions" },
1721 { CPU_LOG_RESET, "cpu_reset",
1722 "show CPU state before CPU resets" },
1723 #endif
1724 #ifdef DEBUG_IOPORT
1725 { CPU_LOG_IOPORT, "ioport",
1726 "show all i/o ports accesses" },
1727 #endif
1728 { 0, NULL, NULL },
1731 #ifndef CONFIG_USER_ONLY
1732 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1733 = QLIST_HEAD_INITIALIZER(memory_client_list);
1735 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1736 ram_addr_t size,
1737 ram_addr_t phys_offset)
1739 CPUPhysMemoryClient *client;
1740 QLIST_FOREACH(client, &memory_client_list, list) {
1741 client->set_memory(client, start_addr, size, phys_offset);
1745 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1746 target_phys_addr_t end)
1748 CPUPhysMemoryClient *client;
1749 QLIST_FOREACH(client, &memory_client_list, list) {
1750 int r = client->sync_dirty_bitmap(client, start, end);
1751 if (r < 0)
1752 return r;
1754 return 0;
1757 static int cpu_notify_migration_log(int enable)
1759 CPUPhysMemoryClient *client;
1760 QLIST_FOREACH(client, &memory_client_list, list) {
1761 int r = client->migration_log(client, enable);
1762 if (r < 0)
1763 return r;
1765 return 0;
1768 static void phys_page_for_each_1(CPUPhysMemoryClient *client,
1769 int level, void **lp)
1771 int i;
1773 if (*lp == NULL) {
1774 return;
1776 if (level == 0) {
1777 PhysPageDesc *pd = *lp;
1778 for (i = 0; i < L2_SIZE; ++i) {
1779 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1780 client->set_memory(client, pd[i].region_offset,
1781 TARGET_PAGE_SIZE, pd[i].phys_offset);
1784 } else {
1785 void **pp = *lp;
1786 for (i = 0; i < L2_SIZE; ++i) {
1787 phys_page_for_each_1(client, level - 1, pp + i);
1792 static void phys_page_for_each(CPUPhysMemoryClient *client)
1794 int i;
1795 for (i = 0; i < P_L1_SIZE; ++i) {
1796 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1797 l1_phys_map + 1);
1801 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1803 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1804 phys_page_for_each(client);
1807 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1809 QLIST_REMOVE(client, list);
1811 #endif
1813 static int cmp1(const char *s1, int n, const char *s2)
1815 if (strlen(s2) != n)
1816 return 0;
1817 return memcmp(s1, s2, n) == 0;
1820 /* takes a comma separated list of log masks. Return 0 if error. */
1821 int cpu_str_to_log_mask(const char *str)
1823 const CPULogItem *item;
1824 int mask;
1825 const char *p, *p1;
1827 p = str;
1828 mask = 0;
1829 for(;;) {
1830 p1 = strchr(p, ',');
1831 if (!p1)
1832 p1 = p + strlen(p);
1833 if(cmp1(p,p1-p,"all")) {
1834 for(item = cpu_log_items; item->mask != 0; item++) {
1835 mask |= item->mask;
1837 } else {
1838 for(item = cpu_log_items; item->mask != 0; item++) {
1839 if (cmp1(p, p1 - p, item->name))
1840 goto found;
1842 return 0;
1844 found:
1845 mask |= item->mask;
1846 if (*p1 != ',')
1847 break;
1848 p = p1 + 1;
1850 return mask;
1853 void cpu_abort(CPUState *env, const char *fmt, ...)
1855 va_list ap;
1856 va_list ap2;
1858 va_start(ap, fmt);
1859 va_copy(ap2, ap);
1860 fprintf(stderr, "qemu: fatal: ");
1861 vfprintf(stderr, fmt, ap);
1862 fprintf(stderr, "\n");
1863 #ifdef TARGET_I386
1864 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1865 #else
1866 cpu_dump_state(env, stderr, fprintf, 0);
1867 #endif
1868 if (qemu_log_enabled()) {
1869 qemu_log("qemu: fatal: ");
1870 qemu_log_vprintf(fmt, ap2);
1871 qemu_log("\n");
1872 #ifdef TARGET_I386
1873 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1874 #else
1875 log_cpu_state(env, 0);
1876 #endif
1877 qemu_log_flush();
1878 qemu_log_close();
1880 va_end(ap2);
1881 va_end(ap);
1882 #if defined(CONFIG_USER_ONLY)
1884 struct sigaction act;
1885 sigfillset(&act.sa_mask);
1886 act.sa_handler = SIG_DFL;
1887 sigaction(SIGABRT, &act, NULL);
1889 #endif
1890 abort();
1893 CPUState *cpu_copy(CPUState *env)
1895 CPUState *new_env = cpu_init(env->cpu_model_str);
1896 CPUState *next_cpu = new_env->next_cpu;
1897 int cpu_index = new_env->cpu_index;
1898 #if defined(TARGET_HAS_ICE)
1899 CPUBreakpoint *bp;
1900 CPUWatchpoint *wp;
1901 #endif
1903 memcpy(new_env, env, sizeof(CPUState));
1905 /* Preserve chaining and index. */
1906 new_env->next_cpu = next_cpu;
1907 new_env->cpu_index = cpu_index;
1909 /* Clone all break/watchpoints.
1910 Note: Once we support ptrace with hw-debug register access, make sure
1911 BP_CPU break/watchpoints are handled correctly on clone. */
1912 QTAILQ_INIT(&env->breakpoints);
1913 QTAILQ_INIT(&env->watchpoints);
1914 #if defined(TARGET_HAS_ICE)
1915 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1916 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1918 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1919 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1920 wp->flags, NULL);
1922 #endif
1924 return new_env;
1927 #if !defined(CONFIG_USER_ONLY)
1929 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1931 unsigned int i;
1933 /* Discard jump cache entries for any tb which might potentially
1934 overlap the flushed page. */
1935 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1936 memset (&env->tb_jmp_cache[i], 0,
1937 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1939 i = tb_jmp_cache_hash_page(addr);
1940 memset (&env->tb_jmp_cache[i], 0,
1941 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1944 static CPUTLBEntry s_cputlb_empty_entry = {
1945 .addr_read = -1,
1946 .addr_write = -1,
1947 .addr_code = -1,
1948 .addend = -1,
1951 /* NOTE: if flush_global is true, also flush global entries (not
1952 implemented yet) */
1953 void tlb_flush(CPUState *env, int flush_global)
1955 int i;
1957 #if defined(DEBUG_TLB)
1958 printf("tlb_flush:\n");
1959 #endif
1960 /* must reset current TB so that interrupts cannot modify the
1961 links while we are modifying them */
1962 env->current_tb = NULL;
1964 for(i = 0; i < CPU_TLB_SIZE; i++) {
1965 int mmu_idx;
1966 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1967 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1971 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1973 env->tlb_flush_addr = -1;
1974 env->tlb_flush_mask = 0;
1975 tlb_flush_count++;
1978 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1980 if (addr == (tlb_entry->addr_read &
1981 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1982 addr == (tlb_entry->addr_write &
1983 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1984 addr == (tlb_entry->addr_code &
1985 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1986 *tlb_entry = s_cputlb_empty_entry;
1990 void tlb_flush_page(CPUState *env, target_ulong addr)
1992 int i;
1993 int mmu_idx;
1995 #if defined(DEBUG_TLB)
1996 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1997 #endif
1998 /* Check if we need to flush due to large pages. */
1999 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2000 #if defined(DEBUG_TLB)
2001 printf("tlb_flush_page: forced full flush ("
2002 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2003 env->tlb_flush_addr, env->tlb_flush_mask);
2004 #endif
2005 tlb_flush(env, 1);
2006 return;
2008 /* must reset current TB so that interrupts cannot modify the
2009 links while we are modifying them */
2010 env->current_tb = NULL;
2012 addr &= TARGET_PAGE_MASK;
2013 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2014 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2015 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2017 tlb_flush_jmp_cache(env, addr);
2020 /* update the TLBs so that writes to code in the virtual page 'addr'
2021 can be detected */
2022 static void tlb_protect_code(ram_addr_t ram_addr)
2024 cpu_physical_memory_reset_dirty(ram_addr,
2025 ram_addr + TARGET_PAGE_SIZE,
2026 CODE_DIRTY_FLAG);
2029 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2030 tested for self modifying code */
2031 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2032 target_ulong vaddr)
2034 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2037 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2038 unsigned long start, unsigned long length)
2040 unsigned long addr;
2041 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2042 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2043 if ((addr - start) < length) {
2044 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2049 /* Note: start and end must be within the same ram block. */
2050 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2051 int dirty_flags)
2053 CPUState *env;
2054 unsigned long length, start1;
2055 int i;
2057 start &= TARGET_PAGE_MASK;
2058 end = TARGET_PAGE_ALIGN(end);
2060 length = end - start;
2061 if (length == 0)
2062 return;
2063 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2065 /* we modify the TLB cache so that the dirty bit will be set again
2066 when accessing the range */
2067 start1 = (unsigned long)qemu_get_ram_ptr(start);
2068 /* Chek that we don't span multiple blocks - this breaks the
2069 address comparisons below. */
2070 if ((unsigned long)qemu_get_ram_ptr(end - 1) - start1
2071 != (end - 1) - start) {
2072 abort();
2075 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2076 int mmu_idx;
2077 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2078 for(i = 0; i < CPU_TLB_SIZE; i++)
2079 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2080 start1, length);
2085 int cpu_physical_memory_set_dirty_tracking(int enable)
2087 int ret = 0;
2088 in_migration = enable;
2089 ret = cpu_notify_migration_log(!!enable);
2090 return ret;
2093 int cpu_physical_memory_get_dirty_tracking(void)
2095 return in_migration;
2098 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2099 target_phys_addr_t end_addr)
2101 int ret;
2103 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2104 return ret;
2107 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2109 ram_addr_t ram_addr;
2110 void *p;
2112 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2113 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2114 + tlb_entry->addend);
2115 ram_addr = qemu_ram_addr_from_host(p);
2116 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2117 tlb_entry->addr_write |= TLB_NOTDIRTY;
2122 /* update the TLB according to the current state of the dirty bits */
2123 void cpu_tlb_update_dirty(CPUState *env)
2125 int i;
2126 int mmu_idx;
2127 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2128 for(i = 0; i < CPU_TLB_SIZE; i++)
2129 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2133 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2135 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2136 tlb_entry->addr_write = vaddr;
2139 /* update the TLB corresponding to virtual page vaddr
2140 so that it is no longer dirty */
2141 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2143 int i;
2144 int mmu_idx;
2146 vaddr &= TARGET_PAGE_MASK;
2147 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2148 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2149 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2152 /* Our TLB does not support large pages, so remember the area covered by
2153 large pages and trigger a full TLB flush if these are invalidated. */
2154 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2155 target_ulong size)
2157 target_ulong mask = ~(size - 1);
2159 if (env->tlb_flush_addr == (target_ulong)-1) {
2160 env->tlb_flush_addr = vaddr & mask;
2161 env->tlb_flush_mask = mask;
2162 return;
2164 /* Extend the existing region to include the new page.
2165 This is a compromise between unnecessary flushes and the cost
2166 of maintaining a full variable size TLB. */
2167 mask &= env->tlb_flush_mask;
2168 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2169 mask <<= 1;
2171 env->tlb_flush_addr &= mask;
2172 env->tlb_flush_mask = mask;
2175 /* Add a new TLB entry. At most one entry for a given virtual address
2176 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2177 supplied size is only used by tlb_flush_page. */
2178 void tlb_set_page(CPUState *env, target_ulong vaddr,
2179 target_phys_addr_t paddr, int prot,
2180 int mmu_idx, target_ulong size)
2182 PhysPageDesc *p;
2183 unsigned long pd;
2184 unsigned int index;
2185 target_ulong address;
2186 target_ulong code_address;
2187 unsigned long addend;
2188 CPUTLBEntry *te;
2189 CPUWatchpoint *wp;
2190 target_phys_addr_t iotlb;
2192 assert(size >= TARGET_PAGE_SIZE);
2193 if (size != TARGET_PAGE_SIZE) {
2194 tlb_add_large_page(env, vaddr, size);
2196 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2197 if (!p) {
2198 pd = IO_MEM_UNASSIGNED;
2199 } else {
2200 pd = p->phys_offset;
2202 #if defined(DEBUG_TLB)
2203 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x%08x prot=%x idx=%d smmu=%d pd=0x%08lx\n",
2204 vaddr, (int)paddr, prot, mmu_idx, is_softmmu, pd);
2205 #endif
2207 address = vaddr;
2208 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2209 /* IO memory case (romd handled later) */
2210 address |= TLB_MMIO;
2212 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2213 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2214 /* Normal RAM. */
2215 iotlb = pd & TARGET_PAGE_MASK;
2216 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2217 iotlb |= IO_MEM_NOTDIRTY;
2218 else
2219 iotlb |= IO_MEM_ROM;
2220 } else {
2221 /* IO handlers are currently passed a physical address.
2222 It would be nice to pass an offset from the base address
2223 of that region. This would avoid having to special case RAM,
2224 and avoid full address decoding in every device.
2225 We can't use the high bits of pd for this because
2226 IO_MEM_ROMD uses these as a ram address. */
2227 iotlb = (pd & ~TARGET_PAGE_MASK);
2228 if (p) {
2229 iotlb += p->region_offset;
2230 } else {
2231 iotlb += paddr;
2235 code_address = address;
2236 /* Make accesses to pages with watchpoints go via the
2237 watchpoint trap routines. */
2238 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2239 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2240 iotlb = io_mem_watch + paddr;
2241 /* TODO: The memory case can be optimized by not trapping
2242 reads of pages with a write breakpoint. */
2243 address |= TLB_MMIO;
2247 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2248 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2249 te = &env->tlb_table[mmu_idx][index];
2250 te->addend = addend - vaddr;
2251 if (prot & PAGE_READ) {
2252 te->addr_read = address;
2253 } else {
2254 te->addr_read = -1;
2257 if (prot & PAGE_EXEC) {
2258 te->addr_code = code_address;
2259 } else {
2260 te->addr_code = -1;
2262 if (prot & PAGE_WRITE) {
2263 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2264 (pd & IO_MEM_ROMD)) {
2265 /* Write access calls the I/O callback. */
2266 te->addr_write = address | TLB_MMIO;
2267 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2268 !cpu_physical_memory_is_dirty(pd)) {
2269 te->addr_write = address | TLB_NOTDIRTY;
2270 } else {
2271 te->addr_write = address;
2273 } else {
2274 te->addr_write = -1;
2278 #else
2280 void tlb_flush(CPUState *env, int flush_global)
2284 void tlb_flush_page(CPUState *env, target_ulong addr)
2289 * Walks guest process memory "regions" one by one
2290 * and calls callback function 'fn' for each region.
2293 struct walk_memory_regions_data
2295 walk_memory_regions_fn fn;
2296 void *priv;
2297 unsigned long start;
2298 int prot;
2301 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2302 abi_ulong end, int new_prot)
2304 if (data->start != -1ul) {
2305 int rc = data->fn(data->priv, data->start, end, data->prot);
2306 if (rc != 0) {
2307 return rc;
2311 data->start = (new_prot ? end : -1ul);
2312 data->prot = new_prot;
2314 return 0;
2317 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2318 abi_ulong base, int level, void **lp)
2320 abi_ulong pa;
2321 int i, rc;
2323 if (*lp == NULL) {
2324 return walk_memory_regions_end(data, base, 0);
2327 if (level == 0) {
2328 PageDesc *pd = *lp;
2329 for (i = 0; i < L2_SIZE; ++i) {
2330 int prot = pd[i].flags;
2332 pa = base | (i << TARGET_PAGE_BITS);
2333 if (prot != data->prot) {
2334 rc = walk_memory_regions_end(data, pa, prot);
2335 if (rc != 0) {
2336 return rc;
2340 } else {
2341 void **pp = *lp;
2342 for (i = 0; i < L2_SIZE; ++i) {
2343 pa = base | ((abi_ulong)i <<
2344 (TARGET_PAGE_BITS + L2_BITS * level));
2345 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2346 if (rc != 0) {
2347 return rc;
2352 return 0;
2355 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2357 struct walk_memory_regions_data data;
2358 unsigned long i;
2360 data.fn = fn;
2361 data.priv = priv;
2362 data.start = -1ul;
2363 data.prot = 0;
2365 for (i = 0; i < V_L1_SIZE; i++) {
2366 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2367 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2368 if (rc != 0) {
2369 return rc;
2373 return walk_memory_regions_end(&data, 0, 0);
2376 static int dump_region(void *priv, abi_ulong start,
2377 abi_ulong end, unsigned long prot)
2379 FILE *f = (FILE *)priv;
2381 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2382 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2383 start, end, end - start,
2384 ((prot & PAGE_READ) ? 'r' : '-'),
2385 ((prot & PAGE_WRITE) ? 'w' : '-'),
2386 ((prot & PAGE_EXEC) ? 'x' : '-'));
2388 return (0);
2391 /* dump memory mappings */
2392 void page_dump(FILE *f)
2394 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2395 "start", "end", "size", "prot");
2396 walk_memory_regions(f, dump_region);
2399 int page_get_flags(target_ulong address)
2401 PageDesc *p;
2403 p = page_find(address >> TARGET_PAGE_BITS);
2404 if (!p)
2405 return 0;
2406 return p->flags;
2409 /* Modify the flags of a page and invalidate the code if necessary.
2410 The flag PAGE_WRITE_ORG is positioned automatically depending
2411 on PAGE_WRITE. The mmap_lock should already be held. */
2412 void page_set_flags(target_ulong start, target_ulong end, int flags)
2414 target_ulong addr, len;
2416 /* This function should never be called with addresses outside the
2417 guest address space. If this assert fires, it probably indicates
2418 a missing call to h2g_valid. */
2419 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2420 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2421 #endif
2422 assert(start < end);
2424 start = start & TARGET_PAGE_MASK;
2425 end = TARGET_PAGE_ALIGN(end);
2427 if (flags & PAGE_WRITE) {
2428 flags |= PAGE_WRITE_ORG;
2431 for (addr = start, len = end - start;
2432 len != 0;
2433 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2434 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2436 /* If the write protection bit is set, then we invalidate
2437 the code inside. */
2438 if (!(p->flags & PAGE_WRITE) &&
2439 (flags & PAGE_WRITE) &&
2440 p->first_tb) {
2441 tb_invalidate_phys_page(addr, 0, NULL);
2443 p->flags = flags;
2447 int page_check_range(target_ulong start, target_ulong len, int flags)
2449 PageDesc *p;
2450 target_ulong end;
2451 target_ulong addr;
2453 /* This function should never be called with addresses outside the
2454 guest address space. If this assert fires, it probably indicates
2455 a missing call to h2g_valid. */
2456 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2457 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2458 #endif
2460 if (start + len - 1 < start) {
2461 /* We've wrapped around. */
2462 return -1;
2465 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2466 start = start & TARGET_PAGE_MASK;
2468 for (addr = start, len = end - start;
2469 len != 0;
2470 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2471 p = page_find(addr >> TARGET_PAGE_BITS);
2472 if( !p )
2473 return -1;
2474 if( !(p->flags & PAGE_VALID) )
2475 return -1;
2477 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2478 return -1;
2479 if (flags & PAGE_WRITE) {
2480 if (!(p->flags & PAGE_WRITE_ORG))
2481 return -1;
2482 /* unprotect the page if it was put read-only because it
2483 contains translated code */
2484 if (!(p->flags & PAGE_WRITE)) {
2485 if (!page_unprotect(addr, 0, NULL))
2486 return -1;
2488 return 0;
2491 return 0;
2494 /* called from signal handler: invalidate the code and unprotect the
2495 page. Return TRUE if the fault was successfully handled. */
2496 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2498 unsigned int prot;
2499 PageDesc *p;
2500 target_ulong host_start, host_end, addr;
2502 /* Technically this isn't safe inside a signal handler. However we
2503 know this only ever happens in a synchronous SEGV handler, so in
2504 practice it seems to be ok. */
2505 mmap_lock();
2507 p = page_find(address >> TARGET_PAGE_BITS);
2508 if (!p) {
2509 mmap_unlock();
2510 return 0;
2513 /* if the page was really writable, then we change its
2514 protection back to writable */
2515 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2516 host_start = address & qemu_host_page_mask;
2517 host_end = host_start + qemu_host_page_size;
2519 prot = 0;
2520 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2521 p = page_find(addr >> TARGET_PAGE_BITS);
2522 p->flags |= PAGE_WRITE;
2523 prot |= p->flags;
2525 /* and since the content will be modified, we must invalidate
2526 the corresponding translated code. */
2527 tb_invalidate_phys_page(addr, pc, puc);
2528 #ifdef DEBUG_TB_CHECK
2529 tb_invalidate_check(addr);
2530 #endif
2532 mprotect((void *)g2h(host_start), qemu_host_page_size,
2533 prot & PAGE_BITS);
2535 mmap_unlock();
2536 return 1;
2538 mmap_unlock();
2539 return 0;
2542 static inline void tlb_set_dirty(CPUState *env,
2543 unsigned long addr, target_ulong vaddr)
2546 #endif /* defined(CONFIG_USER_ONLY) */
2548 #if !defined(CONFIG_USER_ONLY)
2550 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2551 typedef struct subpage_t {
2552 target_phys_addr_t base;
2553 CPUReadMemoryFunc * const *mem_read[TARGET_PAGE_SIZE][4];
2554 CPUWriteMemoryFunc * const *mem_write[TARGET_PAGE_SIZE][4];
2555 void *opaque[TARGET_PAGE_SIZE][2][4];
2556 ram_addr_t region_offset[TARGET_PAGE_SIZE][2][4];
2557 } subpage_t;
2559 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2560 ram_addr_t memory, ram_addr_t region_offset);
2561 static void *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2562 ram_addr_t orig_memory, ram_addr_t region_offset);
2563 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2564 need_subpage) \
2565 do { \
2566 if (addr > start_addr) \
2567 start_addr2 = 0; \
2568 else { \
2569 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2570 if (start_addr2 > 0) \
2571 need_subpage = 1; \
2574 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2575 end_addr2 = TARGET_PAGE_SIZE - 1; \
2576 else { \
2577 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2578 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2579 need_subpage = 1; \
2581 } while (0)
2583 /* register physical memory.
2584 For RAM, 'size' must be a multiple of the target page size.
2585 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2586 io memory page. The address used when calling the IO function is
2587 the offset from the start of the region, plus region_offset. Both
2588 start_addr and region_offset are rounded down to a page boundary
2589 before calculating this offset. This should not be a problem unless
2590 the low bits of start_addr and region_offset differ. */
2591 void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
2592 ram_addr_t size,
2593 ram_addr_t phys_offset,
2594 ram_addr_t region_offset)
2596 target_phys_addr_t addr, end_addr;
2597 PhysPageDesc *p;
2598 CPUState *env;
2599 ram_addr_t orig_size = size;
2600 void *subpage;
2602 cpu_notify_set_memory(start_addr, size, phys_offset);
2604 if (phys_offset == IO_MEM_UNASSIGNED) {
2605 region_offset = start_addr;
2607 region_offset &= TARGET_PAGE_MASK;
2608 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2609 end_addr = start_addr + (target_phys_addr_t)size;
2610 for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {
2611 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2612 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2613 ram_addr_t orig_memory = p->phys_offset;
2614 target_phys_addr_t start_addr2, end_addr2;
2615 int need_subpage = 0;
2617 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2618 need_subpage);
2619 if (need_subpage || phys_offset & IO_MEM_SUBWIDTH) {
2620 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2621 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2622 &p->phys_offset, orig_memory,
2623 p->region_offset);
2624 } else {
2625 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2626 >> IO_MEM_SHIFT];
2628 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2629 region_offset);
2630 p->region_offset = 0;
2631 } else {
2632 p->phys_offset = phys_offset;
2633 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2634 (phys_offset & IO_MEM_ROMD))
2635 phys_offset += TARGET_PAGE_SIZE;
2637 } else {
2638 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2639 p->phys_offset = phys_offset;
2640 p->region_offset = region_offset;
2641 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2642 (phys_offset & IO_MEM_ROMD)) {
2643 phys_offset += TARGET_PAGE_SIZE;
2644 } else {
2645 target_phys_addr_t start_addr2, end_addr2;
2646 int need_subpage = 0;
2648 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2649 end_addr2, need_subpage);
2651 if (need_subpage || phys_offset & IO_MEM_SUBWIDTH) {
2652 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2653 &p->phys_offset, IO_MEM_UNASSIGNED,
2654 addr & TARGET_PAGE_MASK);
2655 subpage_register(subpage, start_addr2, end_addr2,
2656 phys_offset, region_offset);
2657 p->region_offset = 0;
2661 region_offset += TARGET_PAGE_SIZE;
2664 /* since each CPU stores ram addresses in its TLB cache, we must
2665 reset the modified entries */
2666 /* XXX: slow ! */
2667 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2668 tlb_flush(env, 1);
2672 /* XXX: temporary until new memory mapping API */
2673 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2675 PhysPageDesc *p;
2677 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2678 if (!p)
2679 return IO_MEM_UNASSIGNED;
2680 return p->phys_offset;
2683 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2685 if (kvm_enabled())
2686 kvm_coalesce_mmio_region(addr, size);
2689 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2691 if (kvm_enabled())
2692 kvm_uncoalesce_mmio_region(addr, size);
2695 void qemu_flush_coalesced_mmio_buffer(void)
2697 if (kvm_enabled())
2698 kvm_flush_coalesced_mmio_buffer();
2701 #if defined(__linux__) && !defined(TARGET_S390X)
2703 #include <sys/vfs.h>
2705 #define HUGETLBFS_MAGIC 0x958458f6
2707 static long gethugepagesize(const char *path)
2709 struct statfs fs;
2710 int ret;
2712 do {
2713 ret = statfs(path, &fs);
2714 } while (ret != 0 && errno == EINTR);
2716 if (ret != 0) {
2717 perror(path);
2718 return 0;
2721 if (fs.f_type != HUGETLBFS_MAGIC)
2722 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2724 return fs.f_bsize;
2727 static void *file_ram_alloc(ram_addr_t memory, const char *path)
2729 char *filename;
2730 void *area;
2731 int fd;
2732 #ifdef MAP_POPULATE
2733 int flags;
2734 #endif
2735 unsigned long hpagesize;
2737 hpagesize = gethugepagesize(path);
2738 if (!hpagesize) {
2739 return NULL;
2742 if (memory < hpagesize) {
2743 return NULL;
2746 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2747 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2748 return NULL;
2751 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2752 return NULL;
2755 fd = mkstemp(filename);
2756 if (fd < 0) {
2757 perror("unable to create backing store for hugepages");
2758 free(filename);
2759 return NULL;
2761 unlink(filename);
2762 free(filename);
2764 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2767 * ftruncate is not supported by hugetlbfs in older
2768 * hosts, so don't bother bailing out on errors.
2769 * If anything goes wrong with it under other filesystems,
2770 * mmap will fail.
2772 if (ftruncate(fd, memory))
2773 perror("ftruncate");
2775 #ifdef MAP_POPULATE
2776 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2777 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2778 * to sidestep this quirk.
2780 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2781 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2782 #else
2783 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2784 #endif
2785 if (area == MAP_FAILED) {
2786 perror("file_ram_alloc: can't mmap RAM pages");
2787 close(fd);
2788 return (NULL);
2790 return area;
2792 #endif
2794 ram_addr_t qemu_ram_alloc(ram_addr_t size)
2796 RAMBlock *new_block;
2798 size = TARGET_PAGE_ALIGN(size);
2799 new_block = qemu_malloc(sizeof(*new_block));
2801 if (mem_path) {
2802 #if defined (__linux__) && !defined(TARGET_S390X)
2803 new_block->host = file_ram_alloc(size, mem_path);
2804 if (!new_block->host)
2805 exit(1);
2806 #else
2807 fprintf(stderr, "-mem-path option unsupported\n");
2808 exit(1);
2809 #endif
2810 } else {
2811 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2812 /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
2813 new_block->host = mmap((void*)0x1000000, size,
2814 PROT_EXEC|PROT_READ|PROT_WRITE,
2815 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
2816 #else
2817 new_block->host = qemu_vmalloc(size);
2818 #endif
2819 #ifdef MADV_MERGEABLE
2820 madvise(new_block->host, size, MADV_MERGEABLE);
2821 #endif
2823 new_block->offset = last_ram_offset;
2824 new_block->length = size;
2826 new_block->next = ram_blocks;
2827 ram_blocks = new_block;
2829 phys_ram_dirty = qemu_realloc(phys_ram_dirty,
2830 (last_ram_offset + size) >> TARGET_PAGE_BITS);
2831 memset(phys_ram_dirty + (last_ram_offset >> TARGET_PAGE_BITS),
2832 0xff, size >> TARGET_PAGE_BITS);
2834 last_ram_offset += size;
2836 if (kvm_enabled())
2837 kvm_setup_guest_memory(new_block->host, size);
2839 return new_block->offset;
2842 void qemu_ram_free(ram_addr_t addr)
2844 /* TODO: implement this. */
2847 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2848 With the exception of the softmmu code in this file, this should
2849 only be used for local memory (e.g. video ram) that the device owns,
2850 and knows it isn't going to access beyond the end of the block.
2852 It should not be used for general purpose DMA.
2853 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2855 void *qemu_get_ram_ptr(ram_addr_t addr)
2857 RAMBlock *prev;
2858 RAMBlock **prevp;
2859 RAMBlock *block;
2861 prev = NULL;
2862 prevp = &ram_blocks;
2863 block = ram_blocks;
2864 while (block && (block->offset > addr
2865 || block->offset + block->length <= addr)) {
2866 if (prev)
2867 prevp = &prev->next;
2868 prev = block;
2869 block = block->next;
2871 if (!block) {
2872 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2873 abort();
2875 /* Move this entry to to start of the list. */
2876 if (prev) {
2877 prev->next = block->next;
2878 block->next = *prevp;
2879 *prevp = block;
2881 return block->host + (addr - block->offset);
2884 /* Some of the softmmu routines need to translate from a host pointer
2885 (typically a TLB entry) back to a ram offset. */
2886 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2888 RAMBlock *prev;
2889 RAMBlock *block;
2890 uint8_t *host = ptr;
2892 prev = NULL;
2893 block = ram_blocks;
2894 while (block && (block->host > host
2895 || block->host + block->length <= host)) {
2896 prev = block;
2897 block = block->next;
2899 if (!block) {
2900 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2901 abort();
2903 return block->offset + (host - block->host);
2906 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
2908 #ifdef DEBUG_UNASSIGNED
2909 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2910 #endif
2911 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2912 do_unassigned_access(addr, 0, 0, 0, 1);
2913 #endif
2914 return 0;
2917 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
2919 #ifdef DEBUG_UNASSIGNED
2920 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2921 #endif
2922 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2923 do_unassigned_access(addr, 0, 0, 0, 2);
2924 #endif
2925 return 0;
2928 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
2930 #ifdef DEBUG_UNASSIGNED
2931 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2932 #endif
2933 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2934 do_unassigned_access(addr, 0, 0, 0, 4);
2935 #endif
2936 return 0;
2939 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
2941 #ifdef DEBUG_UNASSIGNED
2942 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
2943 #endif
2944 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2945 do_unassigned_access(addr, 1, 0, 0, 1);
2946 #endif
2949 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
2951 #ifdef DEBUG_UNASSIGNED
2952 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
2953 #endif
2954 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2955 do_unassigned_access(addr, 1, 0, 0, 2);
2956 #endif
2959 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
2961 #ifdef DEBUG_UNASSIGNED
2962 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
2963 #endif
2964 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2965 do_unassigned_access(addr, 1, 0, 0, 4);
2966 #endif
2969 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
2970 unassigned_mem_readb,
2971 unassigned_mem_readw,
2972 unassigned_mem_readl,
2975 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
2976 unassigned_mem_writeb,
2977 unassigned_mem_writew,
2978 unassigned_mem_writel,
2981 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
2982 uint32_t val)
2984 int dirty_flags;
2985 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2986 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2987 #if !defined(CONFIG_USER_ONLY)
2988 tb_invalidate_phys_page_fast(ram_addr, 1);
2989 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2990 #endif
2992 stb_p(qemu_get_ram_ptr(ram_addr), val);
2993 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2994 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2995 /* we remove the notdirty callback only if the code has been
2996 flushed */
2997 if (dirty_flags == 0xff)
2998 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3001 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3002 uint32_t val)
3004 int dirty_flags;
3005 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3006 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3007 #if !defined(CONFIG_USER_ONLY)
3008 tb_invalidate_phys_page_fast(ram_addr, 2);
3009 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3010 #endif
3012 stw_p(qemu_get_ram_ptr(ram_addr), val);
3013 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3014 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3015 /* we remove the notdirty callback only if the code has been
3016 flushed */
3017 if (dirty_flags == 0xff)
3018 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3021 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3022 uint32_t val)
3024 int dirty_flags;
3025 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3026 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3027 #if !defined(CONFIG_USER_ONLY)
3028 tb_invalidate_phys_page_fast(ram_addr, 4);
3029 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3030 #endif
3032 stl_p(qemu_get_ram_ptr(ram_addr), val);
3033 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3034 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3035 /* we remove the notdirty callback only if the code has been
3036 flushed */
3037 if (dirty_flags == 0xff)
3038 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3041 static CPUReadMemoryFunc * const error_mem_read[3] = {
3042 NULL, /* never used */
3043 NULL, /* never used */
3044 NULL, /* never used */
3047 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3048 notdirty_mem_writeb,
3049 notdirty_mem_writew,
3050 notdirty_mem_writel,
3053 /* Generate a debug exception if a watchpoint has been hit. */
3054 static void check_watchpoint(int offset, int len_mask, int flags)
3056 CPUState *env = cpu_single_env;
3057 target_ulong pc, cs_base;
3058 TranslationBlock *tb;
3059 target_ulong vaddr;
3060 CPUWatchpoint *wp;
3061 int cpu_flags;
3063 if (env->watchpoint_hit) {
3064 /* We re-entered the check after replacing the TB. Now raise
3065 * the debug interrupt so that is will trigger after the
3066 * current instruction. */
3067 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3068 return;
3070 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3071 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3072 if ((vaddr == (wp->vaddr & len_mask) ||
3073 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3074 wp->flags |= BP_WATCHPOINT_HIT;
3075 if (!env->watchpoint_hit) {
3076 env->watchpoint_hit = wp;
3077 tb = tb_find_pc(env->mem_io_pc);
3078 if (!tb) {
3079 cpu_abort(env, "check_watchpoint: could not find TB for "
3080 "pc=%p", (void *)env->mem_io_pc);
3082 cpu_restore_state(tb, env, env->mem_io_pc, NULL);
3083 tb_phys_invalidate(tb, -1);
3084 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3085 env->exception_index = EXCP_DEBUG;
3086 } else {
3087 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3088 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3090 cpu_resume_from_signal(env, NULL);
3092 } else {
3093 wp->flags &= ~BP_WATCHPOINT_HIT;
3098 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3099 so these check for a hit then pass through to the normal out-of-line
3100 phys routines. */
3101 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3103 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3104 return ldub_phys(addr);
3107 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3109 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3110 return lduw_phys(addr);
3113 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3115 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3116 return ldl_phys(addr);
3119 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3120 uint32_t val)
3122 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3123 stb_phys(addr, val);
3126 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3127 uint32_t val)
3129 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3130 stw_phys(addr, val);
3133 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3134 uint32_t val)
3136 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3137 stl_phys(addr, val);
3140 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3141 watch_mem_readb,
3142 watch_mem_readw,
3143 watch_mem_readl,
3146 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3147 watch_mem_writeb,
3148 watch_mem_writew,
3149 watch_mem_writel,
3152 static inline uint32_t subpage_readlen (subpage_t *mmio, target_phys_addr_t addr,
3153 unsigned int len)
3155 uint32_t ret;
3156 unsigned int idx;
3158 idx = SUBPAGE_IDX(addr);
3159 #if defined(DEBUG_SUBPAGE)
3160 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3161 mmio, len, addr, idx);
3162 #endif
3163 ret = (**mmio->mem_read[idx][len])(mmio->opaque[idx][0][len],
3164 addr + mmio->region_offset[idx][0][len]);
3166 return ret;
3169 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3170 uint32_t value, unsigned int len)
3172 unsigned int idx;
3174 idx = SUBPAGE_IDX(addr);
3175 #if defined(DEBUG_SUBPAGE)
3176 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n", __func__,
3177 mmio, len, addr, idx, value);
3178 #endif
3179 (**mmio->mem_write[idx][len])(mmio->opaque[idx][1][len],
3180 addr + mmio->region_offset[idx][1][len],
3181 value);
3184 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3186 #if defined(DEBUG_SUBPAGE)
3187 printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
3188 #endif
3190 return subpage_readlen(opaque, addr, 0);
3193 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3194 uint32_t value)
3196 #if defined(DEBUG_SUBPAGE)
3197 printf("%s: addr " TARGET_FMT_plx " val %08x\n", __func__, addr, value);
3198 #endif
3199 subpage_writelen(opaque, addr, value, 0);
3202 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3204 #if defined(DEBUG_SUBPAGE)
3205 printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
3206 #endif
3208 return subpage_readlen(opaque, addr, 1);
3211 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3212 uint32_t value)
3214 #if defined(DEBUG_SUBPAGE)
3215 printf("%s: addr " TARGET_FMT_plx " val %08x\n", __func__, addr, value);
3216 #endif
3217 subpage_writelen(opaque, addr, value, 1);
3220 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3222 #if defined(DEBUG_SUBPAGE)
3223 printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
3224 #endif
3226 return subpage_readlen(opaque, addr, 2);
3229 static void subpage_writel (void *opaque,
3230 target_phys_addr_t addr, uint32_t value)
3232 #if defined(DEBUG_SUBPAGE)
3233 printf("%s: addr " TARGET_FMT_plx " val %08x\n", __func__, addr, value);
3234 #endif
3235 subpage_writelen(opaque, addr, value, 2);
3238 static CPUReadMemoryFunc * const subpage_read[] = {
3239 &subpage_readb,
3240 &subpage_readw,
3241 &subpage_readl,
3244 static CPUWriteMemoryFunc * const subpage_write[] = {
3245 &subpage_writeb,
3246 &subpage_writew,
3247 &subpage_writel,
3250 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3251 ram_addr_t memory, ram_addr_t region_offset)
3253 int idx, eidx;
3254 unsigned int i;
3256 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3257 return -1;
3258 idx = SUBPAGE_IDX(start);
3259 eidx = SUBPAGE_IDX(end);
3260 #if defined(DEBUG_SUBPAGE)
3261 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3262 mmio, start, end, idx, eidx, memory);
3263 #endif
3264 memory >>= IO_MEM_SHIFT;
3265 for (; idx <= eidx; idx++) {
3266 for (i = 0; i < 4; i++) {
3267 if (io_mem_read[memory][i]) {
3268 mmio->mem_read[idx][i] = &io_mem_read[memory][i];
3269 mmio->opaque[idx][0][i] = io_mem_opaque[memory];
3270 mmio->region_offset[idx][0][i] = region_offset;
3272 if (io_mem_write[memory][i]) {
3273 mmio->mem_write[idx][i] = &io_mem_write[memory][i];
3274 mmio->opaque[idx][1][i] = io_mem_opaque[memory];
3275 mmio->region_offset[idx][1][i] = region_offset;
3280 return 0;
3283 static void *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3284 ram_addr_t orig_memory, ram_addr_t region_offset)
3286 subpage_t *mmio;
3287 int subpage_memory;
3289 mmio = qemu_mallocz(sizeof(subpage_t));
3291 mmio->base = base;
3292 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio);
3293 #if defined(DEBUG_SUBPAGE)
3294 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3295 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3296 #endif
3297 *phys = subpage_memory | IO_MEM_SUBPAGE;
3298 subpage_register(mmio, 0, TARGET_PAGE_SIZE - 1, orig_memory,
3299 region_offset);
3301 return mmio;
3304 static int get_free_io_mem_idx(void)
3306 int i;
3308 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3309 if (!io_mem_used[i]) {
3310 io_mem_used[i] = 1;
3311 return i;
3313 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3314 return -1;
3317 /* mem_read and mem_write are arrays of functions containing the
3318 function to access byte (index 0), word (index 1) and dword (index
3319 2). Functions can be omitted with a NULL function pointer.
3320 If io_index is non zero, the corresponding io zone is
3321 modified. If it is zero, a new io zone is allocated. The return
3322 value can be used with cpu_register_physical_memory(). (-1) is
3323 returned if error. */
3324 static int cpu_register_io_memory_fixed(int io_index,
3325 CPUReadMemoryFunc * const *mem_read,
3326 CPUWriteMemoryFunc * const *mem_write,
3327 void *opaque)
3329 int i, subwidth = 0;
3331 if (io_index <= 0) {
3332 io_index = get_free_io_mem_idx();
3333 if (io_index == -1)
3334 return io_index;
3335 } else {
3336 io_index >>= IO_MEM_SHIFT;
3337 if (io_index >= IO_MEM_NB_ENTRIES)
3338 return -1;
3341 for(i = 0;i < 3; i++) {
3342 if (!mem_read[i] || !mem_write[i])
3343 subwidth = IO_MEM_SUBWIDTH;
3344 io_mem_read[io_index][i] = mem_read[i];
3345 io_mem_write[io_index][i] = mem_write[i];
3347 io_mem_opaque[io_index] = opaque;
3348 return (io_index << IO_MEM_SHIFT) | subwidth;
3351 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3352 CPUWriteMemoryFunc * const *mem_write,
3353 void *opaque)
3355 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque);
3358 void cpu_unregister_io_memory(int io_table_address)
3360 int i;
3361 int io_index = io_table_address >> IO_MEM_SHIFT;
3363 for (i=0;i < 3; i++) {
3364 io_mem_read[io_index][i] = unassigned_mem_read[i];
3365 io_mem_write[io_index][i] = unassigned_mem_write[i];
3367 io_mem_opaque[io_index] = NULL;
3368 io_mem_used[io_index] = 0;
3371 static void io_mem_init(void)
3373 int i;
3375 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read, unassigned_mem_write, NULL);
3376 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read, unassigned_mem_write, NULL);
3377 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read, notdirty_mem_write, NULL);
3378 for (i=0; i<5; i++)
3379 io_mem_used[i] = 1;
3381 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3382 watch_mem_write, NULL);
3385 #endif /* !defined(CONFIG_USER_ONLY) */
3387 /* physical memory access (slow version, mainly for debug) */
3388 #if defined(CONFIG_USER_ONLY)
3389 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3390 uint8_t *buf, int len, int is_write)
3392 int l, flags;
3393 target_ulong page;
3394 void * p;
3396 while (len > 0) {
3397 page = addr & TARGET_PAGE_MASK;
3398 l = (page + TARGET_PAGE_SIZE) - addr;
3399 if (l > len)
3400 l = len;
3401 flags = page_get_flags(page);
3402 if (!(flags & PAGE_VALID))
3403 return -1;
3404 if (is_write) {
3405 if (!(flags & PAGE_WRITE))
3406 return -1;
3407 /* XXX: this code should not depend on lock_user */
3408 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3409 return -1;
3410 memcpy(p, buf, l);
3411 unlock_user(p, addr, l);
3412 } else {
3413 if (!(flags & PAGE_READ))
3414 return -1;
3415 /* XXX: this code should not depend on lock_user */
3416 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3417 return -1;
3418 memcpy(buf, p, l);
3419 unlock_user(p, addr, 0);
3421 len -= l;
3422 buf += l;
3423 addr += l;
3425 return 0;
3428 #else
3429 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3430 int len, int is_write)
3432 int l, io_index;
3433 uint8_t *ptr;
3434 uint32_t val;
3435 target_phys_addr_t page;
3436 unsigned long pd;
3437 PhysPageDesc *p;
3439 while (len > 0) {
3440 page = addr & TARGET_PAGE_MASK;
3441 l = (page + TARGET_PAGE_SIZE) - addr;
3442 if (l > len)
3443 l = len;
3444 p = phys_page_find(page >> TARGET_PAGE_BITS);
3445 if (!p) {
3446 pd = IO_MEM_UNASSIGNED;
3447 } else {
3448 pd = p->phys_offset;
3451 if (is_write) {
3452 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3453 target_phys_addr_t addr1 = addr;
3454 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3455 if (p)
3456 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3457 /* XXX: could force cpu_single_env to NULL to avoid
3458 potential bugs */
3459 if (l >= 4 && ((addr1 & 3) == 0)) {
3460 /* 32 bit write access */
3461 val = ldl_p(buf);
3462 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3463 l = 4;
3464 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3465 /* 16 bit write access */
3466 val = lduw_p(buf);
3467 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3468 l = 2;
3469 } else {
3470 /* 8 bit write access */
3471 val = ldub_p(buf);
3472 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3473 l = 1;
3475 } else {
3476 unsigned long addr1;
3477 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3478 /* RAM case */
3479 ptr = qemu_get_ram_ptr(addr1);
3480 memcpy(ptr, buf, l);
3481 if (!cpu_physical_memory_is_dirty(addr1)) {
3482 /* invalidate code */
3483 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3484 /* set dirty bit */
3485 cpu_physical_memory_set_dirty_flags(
3486 addr1, (0xff & ~CODE_DIRTY_FLAG));
3489 } else {
3490 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3491 !(pd & IO_MEM_ROMD)) {
3492 target_phys_addr_t addr1 = addr;
3493 /* I/O case */
3494 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3495 if (p)
3496 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3497 if (l >= 4 && ((addr1 & 3) == 0)) {
3498 /* 32 bit read access */
3499 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3500 stl_p(buf, val);
3501 l = 4;
3502 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3503 /* 16 bit read access */
3504 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3505 stw_p(buf, val);
3506 l = 2;
3507 } else {
3508 /* 8 bit read access */
3509 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3510 stb_p(buf, val);
3511 l = 1;
3513 } else {
3514 /* RAM case */
3515 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3516 (addr & ~TARGET_PAGE_MASK);
3517 memcpy(buf, ptr, l);
3520 len -= l;
3521 buf += l;
3522 addr += l;
3526 /* used for ROM loading : can write in RAM and ROM */
3527 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3528 const uint8_t *buf, int len)
3530 int l;
3531 uint8_t *ptr;
3532 target_phys_addr_t page;
3533 unsigned long pd;
3534 PhysPageDesc *p;
3536 while (len > 0) {
3537 page = addr & TARGET_PAGE_MASK;
3538 l = (page + TARGET_PAGE_SIZE) - addr;
3539 if (l > len)
3540 l = len;
3541 p = phys_page_find(page >> TARGET_PAGE_BITS);
3542 if (!p) {
3543 pd = IO_MEM_UNASSIGNED;
3544 } else {
3545 pd = p->phys_offset;
3548 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3549 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3550 !(pd & IO_MEM_ROMD)) {
3551 /* do nothing */
3552 } else {
3553 unsigned long addr1;
3554 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3555 /* ROM/RAM case */
3556 ptr = qemu_get_ram_ptr(addr1);
3557 memcpy(ptr, buf, l);
3559 len -= l;
3560 buf += l;
3561 addr += l;
3565 typedef struct {
3566 void *buffer;
3567 target_phys_addr_t addr;
3568 target_phys_addr_t len;
3569 } BounceBuffer;
3571 static BounceBuffer bounce;
3573 typedef struct MapClient {
3574 void *opaque;
3575 void (*callback)(void *opaque);
3576 QLIST_ENTRY(MapClient) link;
3577 } MapClient;
3579 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3580 = QLIST_HEAD_INITIALIZER(map_client_list);
3582 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3584 MapClient *client = qemu_malloc(sizeof(*client));
3586 client->opaque = opaque;
3587 client->callback = callback;
3588 QLIST_INSERT_HEAD(&map_client_list, client, link);
3589 return client;
3592 void cpu_unregister_map_client(void *_client)
3594 MapClient *client = (MapClient *)_client;
3596 QLIST_REMOVE(client, link);
3597 qemu_free(client);
3600 static void cpu_notify_map_clients(void)
3602 MapClient *client;
3604 while (!QLIST_EMPTY(&map_client_list)) {
3605 client = QLIST_FIRST(&map_client_list);
3606 client->callback(client->opaque);
3607 cpu_unregister_map_client(client);
3611 /* Map a physical memory region into a host virtual address.
3612 * May map a subset of the requested range, given by and returned in *plen.
3613 * May return NULL if resources needed to perform the mapping are exhausted.
3614 * Use only for reads OR writes - not for read-modify-write operations.
3615 * Use cpu_register_map_client() to know when retrying the map operation is
3616 * likely to succeed.
3618 void *cpu_physical_memory_map(target_phys_addr_t addr,
3619 target_phys_addr_t *plen,
3620 int is_write)
3622 target_phys_addr_t len = *plen;
3623 target_phys_addr_t done = 0;
3624 int l;
3625 uint8_t *ret = NULL;
3626 uint8_t *ptr;
3627 target_phys_addr_t page;
3628 unsigned long pd;
3629 PhysPageDesc *p;
3630 unsigned long addr1;
3632 while (len > 0) {
3633 page = addr & TARGET_PAGE_MASK;
3634 l = (page + TARGET_PAGE_SIZE) - addr;
3635 if (l > len)
3636 l = len;
3637 p = phys_page_find(page >> TARGET_PAGE_BITS);
3638 if (!p) {
3639 pd = IO_MEM_UNASSIGNED;
3640 } else {
3641 pd = p->phys_offset;
3644 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3645 if (done || bounce.buffer) {
3646 break;
3648 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3649 bounce.addr = addr;
3650 bounce.len = l;
3651 if (!is_write) {
3652 cpu_physical_memory_rw(addr, bounce.buffer, l, 0);
3654 ptr = bounce.buffer;
3655 } else {
3656 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3657 ptr = qemu_get_ram_ptr(addr1);
3659 if (!done) {
3660 ret = ptr;
3661 } else if (ret + done != ptr) {
3662 break;
3665 len -= l;
3666 addr += l;
3667 done += l;
3669 *plen = done;
3670 return ret;
3673 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3674 * Will also mark the memory as dirty if is_write == 1. access_len gives
3675 * the amount of memory that was actually read or written by the caller.
3677 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3678 int is_write, target_phys_addr_t access_len)
3680 if (buffer != bounce.buffer) {
3681 if (is_write) {
3682 ram_addr_t addr1 = qemu_ram_addr_from_host(buffer);
3683 while (access_len) {
3684 unsigned l;
3685 l = TARGET_PAGE_SIZE;
3686 if (l > access_len)
3687 l = access_len;
3688 if (!cpu_physical_memory_is_dirty(addr1)) {
3689 /* invalidate code */
3690 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3691 /* set dirty bit */
3692 cpu_physical_memory_set_dirty_flags(
3693 addr1, (0xff & ~CODE_DIRTY_FLAG));
3695 addr1 += l;
3696 access_len -= l;
3699 return;
3701 if (is_write) {
3702 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3704 qemu_vfree(bounce.buffer);
3705 bounce.buffer = NULL;
3706 cpu_notify_map_clients();
3709 /* warning: addr must be aligned */
3710 uint32_t ldl_phys(target_phys_addr_t addr)
3712 int io_index;
3713 uint8_t *ptr;
3714 uint32_t val;
3715 unsigned long pd;
3716 PhysPageDesc *p;
3718 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3719 if (!p) {
3720 pd = IO_MEM_UNASSIGNED;
3721 } else {
3722 pd = p->phys_offset;
3725 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3726 !(pd & IO_MEM_ROMD)) {
3727 /* I/O case */
3728 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3729 if (p)
3730 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3731 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
3732 } else {
3733 /* RAM case */
3734 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3735 (addr & ~TARGET_PAGE_MASK);
3736 val = ldl_p(ptr);
3738 return val;
3741 /* warning: addr must be aligned */
3742 uint64_t ldq_phys(target_phys_addr_t addr)
3744 int io_index;
3745 uint8_t *ptr;
3746 uint64_t val;
3747 unsigned long pd;
3748 PhysPageDesc *p;
3750 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3751 if (!p) {
3752 pd = IO_MEM_UNASSIGNED;
3753 } else {
3754 pd = p->phys_offset;
3757 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3758 !(pd & IO_MEM_ROMD)) {
3759 /* I/O case */
3760 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3761 if (p)
3762 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3763 #ifdef TARGET_WORDS_BIGENDIAN
3764 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
3765 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
3766 #else
3767 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
3768 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
3769 #endif
3770 } else {
3771 /* RAM case */
3772 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3773 (addr & ~TARGET_PAGE_MASK);
3774 val = ldq_p(ptr);
3776 return val;
3779 /* XXX: optimize */
3780 uint32_t ldub_phys(target_phys_addr_t addr)
3782 uint8_t val;
3783 cpu_physical_memory_read(addr, &val, 1);
3784 return val;
3787 /* XXX: optimize */
3788 uint32_t lduw_phys(target_phys_addr_t addr)
3790 uint16_t val;
3791 cpu_physical_memory_read(addr, (uint8_t *)&val, 2);
3792 return tswap16(val);
3795 /* warning: addr must be aligned. The ram page is not masked as dirty
3796 and the code inside is not invalidated. It is useful if the dirty
3797 bits are used to track modified PTEs */
3798 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3800 int io_index;
3801 uint8_t *ptr;
3802 unsigned long pd;
3803 PhysPageDesc *p;
3805 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3806 if (!p) {
3807 pd = IO_MEM_UNASSIGNED;
3808 } else {
3809 pd = p->phys_offset;
3812 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3813 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3814 if (p)
3815 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3816 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
3817 } else {
3818 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3819 ptr = qemu_get_ram_ptr(addr1);
3820 stl_p(ptr, val);
3822 if (unlikely(in_migration)) {
3823 if (!cpu_physical_memory_is_dirty(addr1)) {
3824 /* invalidate code */
3825 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3826 /* set dirty bit */
3827 cpu_physical_memory_set_dirty_flags(
3828 addr1, (0xff & ~CODE_DIRTY_FLAG));
3834 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3836 int io_index;
3837 uint8_t *ptr;
3838 unsigned long pd;
3839 PhysPageDesc *p;
3841 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3842 if (!p) {
3843 pd = IO_MEM_UNASSIGNED;
3844 } else {
3845 pd = p->phys_offset;
3848 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3849 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3850 if (p)
3851 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3852 #ifdef TARGET_WORDS_BIGENDIAN
3853 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
3854 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
3855 #else
3856 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
3857 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
3858 #endif
3859 } else {
3860 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3861 (addr & ~TARGET_PAGE_MASK);
3862 stq_p(ptr, val);
3866 /* warning: addr must be aligned */
3867 void stl_phys(target_phys_addr_t addr, uint32_t val)
3869 int io_index;
3870 uint8_t *ptr;
3871 unsigned long pd;
3872 PhysPageDesc *p;
3874 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3875 if (!p) {
3876 pd = IO_MEM_UNASSIGNED;
3877 } else {
3878 pd = p->phys_offset;
3881 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3882 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3883 if (p)
3884 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3885 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
3886 } else {
3887 unsigned long addr1;
3888 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3889 /* RAM case */
3890 ptr = qemu_get_ram_ptr(addr1);
3891 stl_p(ptr, val);
3892 if (!cpu_physical_memory_is_dirty(addr1)) {
3893 /* invalidate code */
3894 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3895 /* set dirty bit */
3896 cpu_physical_memory_set_dirty_flags(addr1,
3897 (0xff & ~CODE_DIRTY_FLAG));
3902 /* XXX: optimize */
3903 void stb_phys(target_phys_addr_t addr, uint32_t val)
3905 uint8_t v = val;
3906 cpu_physical_memory_write(addr, &v, 1);
3909 /* XXX: optimize */
3910 void stw_phys(target_phys_addr_t addr, uint32_t val)
3912 uint16_t v = tswap16(val);
3913 cpu_physical_memory_write(addr, (const uint8_t *)&v, 2);
3916 /* XXX: optimize */
3917 void stq_phys(target_phys_addr_t addr, uint64_t val)
3919 val = tswap64(val);
3920 cpu_physical_memory_write(addr, (const uint8_t *)&val, 8);
3923 /* virtual memory access for debug (includes writing to ROM) */
3924 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3925 uint8_t *buf, int len, int is_write)
3927 int l;
3928 target_phys_addr_t phys_addr;
3929 target_ulong page;
3931 while (len > 0) {
3932 page = addr & TARGET_PAGE_MASK;
3933 phys_addr = cpu_get_phys_page_debug(env, page);
3934 /* if no physical page mapped, return an error */
3935 if (phys_addr == -1)
3936 return -1;
3937 l = (page + TARGET_PAGE_SIZE) - addr;
3938 if (l > len)
3939 l = len;
3940 phys_addr += (addr & ~TARGET_PAGE_MASK);
3941 if (is_write)
3942 cpu_physical_memory_write_rom(phys_addr, buf, l);
3943 else
3944 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
3945 len -= l;
3946 buf += l;
3947 addr += l;
3949 return 0;
3951 #endif
3953 /* in deterministic execution mode, instructions doing device I/Os
3954 must be at the end of the TB */
3955 void cpu_io_recompile(CPUState *env, void *retaddr)
3957 TranslationBlock *tb;
3958 uint32_t n, cflags;
3959 target_ulong pc, cs_base;
3960 uint64_t flags;
3962 tb = tb_find_pc((unsigned long)retaddr);
3963 if (!tb) {
3964 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
3965 retaddr);
3967 n = env->icount_decr.u16.low + tb->icount;
3968 cpu_restore_state(tb, env, (unsigned long)retaddr, NULL);
3969 /* Calculate how many instructions had been executed before the fault
3970 occurred. */
3971 n = n - env->icount_decr.u16.low;
3972 /* Generate a new TB ending on the I/O insn. */
3973 n++;
3974 /* On MIPS and SH, delay slot instructions can only be restarted if
3975 they were already the first instruction in the TB. If this is not
3976 the first instruction in a TB then re-execute the preceding
3977 branch. */
3978 #if defined(TARGET_MIPS)
3979 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
3980 env->active_tc.PC -= 4;
3981 env->icount_decr.u16.low++;
3982 env->hflags &= ~MIPS_HFLAG_BMASK;
3984 #elif defined(TARGET_SH4)
3985 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
3986 && n > 1) {
3987 env->pc -= 2;
3988 env->icount_decr.u16.low++;
3989 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
3991 #endif
3992 /* This should never happen. */
3993 if (n > CF_COUNT_MASK)
3994 cpu_abort(env, "TB too big during recompile");
3996 cflags = n | CF_LAST_IO;
3997 pc = tb->pc;
3998 cs_base = tb->cs_base;
3999 flags = tb->flags;
4000 tb_phys_invalidate(tb, -1);
4001 /* FIXME: In theory this could raise an exception. In practice
4002 we have already translated the block once so it's probably ok. */
4003 tb_gen_code(env, pc, cs_base, flags, cflags);
4004 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4005 the first in the TB) then we end up generating a whole new TB and
4006 repeating the fault, which is horribly inefficient.
4007 Better would be to execute just this insn uncached, or generate a
4008 second new TB. */
4009 cpu_resume_from_signal(env, NULL);
4012 #if !defined(CONFIG_USER_ONLY)
4014 void dump_exec_info(FILE *f,
4015 int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
4017 int i, target_code_size, max_target_code_size;
4018 int direct_jmp_count, direct_jmp2_count, cross_page;
4019 TranslationBlock *tb;
4021 target_code_size = 0;
4022 max_target_code_size = 0;
4023 cross_page = 0;
4024 direct_jmp_count = 0;
4025 direct_jmp2_count = 0;
4026 for(i = 0; i < nb_tbs; i++) {
4027 tb = &tbs[i];
4028 target_code_size += tb->size;
4029 if (tb->size > max_target_code_size)
4030 max_target_code_size = tb->size;
4031 if (tb->page_addr[1] != -1)
4032 cross_page++;
4033 if (tb->tb_next_offset[0] != 0xffff) {
4034 direct_jmp_count++;
4035 if (tb->tb_next_offset[1] != 0xffff) {
4036 direct_jmp2_count++;
4040 /* XXX: avoid using doubles ? */
4041 cpu_fprintf(f, "Translation buffer state:\n");
4042 cpu_fprintf(f, "gen code size %ld/%ld\n",
4043 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4044 cpu_fprintf(f, "TB count %d/%d\n",
4045 nb_tbs, code_gen_max_blocks);
4046 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4047 nb_tbs ? target_code_size / nb_tbs : 0,
4048 max_target_code_size);
4049 cpu_fprintf(f, "TB avg host size %d bytes (expansion ratio: %0.1f)\n",
4050 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4051 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4052 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4053 cross_page,
4054 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4055 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4056 direct_jmp_count,
4057 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4058 direct_jmp2_count,
4059 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4060 cpu_fprintf(f, "\nStatistics:\n");
4061 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4062 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4063 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4064 tcg_dump_info(f, cpu_fprintf);
4067 #define MMUSUFFIX _cmmu
4068 #define GETPC() NULL
4069 #define env cpu_single_env
4070 #define SOFTMMU_CODE_ACCESS
4072 #define SHIFT 0
4073 #include "softmmu_template.h"
4075 #define SHIFT 1
4076 #include "softmmu_template.h"
4078 #define SHIFT 2
4079 #include "softmmu_template.h"
4081 #define SHIFT 3
4082 #include "softmmu_template.h"
4084 #undef env
4086 #endif