memory: unify the two branches of cpu_register_physical_memory_log()
[qemu/kevin.git] / exec.c
blob6232a39224ff6091e21ef8ad89ea3e03c910ceb6
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #define WANT_EXEC_OBSOLETE
61 #include "exec-obsolete.h"
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
118 static MemoryRegion *system_memory;
119 static MemoryRegion *system_io;
121 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
122 static MemoryRegion io_mem_subpage_ram;
124 #endif
126 CPUState *first_cpu;
127 /* current CPU in the current thread. It is only valid inside
128 cpu_exec() */
129 DEFINE_TLS(CPUState *,cpu_single_env);
130 /* 0 = Do not count executed instructions.
131 1 = Precise instruction counting.
132 2 = Adaptive rate instruction counting. */
133 int use_icount = 0;
135 typedef struct PageDesc {
136 /* list of TBs intersecting this ram page */
137 TranslationBlock *first_tb;
138 /* in order to optimize self modifying code, we count the number
139 of lookups we do to a given page to use a bitmap */
140 unsigned int code_write_count;
141 uint8_t *code_bitmap;
142 #if defined(CONFIG_USER_ONLY)
143 unsigned long flags;
144 #endif
145 } PageDesc;
147 /* In system mode we want L1_MAP to be based on ram offsets,
148 while in user mode we want it to be based on virtual addresses. */
149 #if !defined(CONFIG_USER_ONLY)
150 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
151 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
154 #endif
155 #else
156 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
157 #endif
159 /* Size of the L2 (and L3, etc) page tables. */
160 #define L2_BITS 10
161 #define L2_SIZE (1 << L2_BITS)
163 #define P_L2_LEVELS \
164 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
166 /* The bits remaining after N lower levels of page tables. */
167 #define V_L1_BITS_REM \
168 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
170 #if V_L1_BITS_REM < 4
171 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
172 #else
173 #define V_L1_BITS V_L1_BITS_REM
174 #endif
176 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
178 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
180 unsigned long qemu_real_host_page_size;
181 unsigned long qemu_host_page_size;
182 unsigned long qemu_host_page_mask;
184 /* This is a multi-level map on the virtual address space.
185 The bottom level has pointers to PageDesc. */
186 static void *l1_map[V_L1_SIZE];
188 #if !defined(CONFIG_USER_ONLY)
189 typedef struct PhysPageDesc {
190 /* offset in host memory of the page + io_index in the low bits */
191 ram_addr_t phys_offset;
192 ram_addr_t region_offset;
193 } PhysPageDesc;
195 typedef struct PhysPageEntry PhysPageEntry;
197 static MemoryRegionSection *phys_sections;
198 static unsigned phys_sections_nb, phys_sections_nb_alloc;
199 static uint16_t phys_section_unassigned;
201 struct PhysPageEntry {
202 union {
203 uint16_t leaf; /* index into phys_sections */
204 uint16_t node; /* index into phys_map_nodes */
205 } u;
208 /* Simple allocator for PhysPageEntry nodes */
209 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
210 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
212 #define PHYS_MAP_NODE_NIL ((uint16_t)~0)
214 /* This is a multi-level map on the physical address space.
215 The bottom level has pointers to PhysPageDesc. */
216 static PhysPageEntry phys_map = { .u.node = PHYS_MAP_NODE_NIL };
218 static void io_mem_init(void);
219 static void memory_map_init(void);
221 /* io memory support */
222 MemoryRegion *io_mem_region[IO_MEM_NB_ENTRIES];
223 static char io_mem_used[IO_MEM_NB_ENTRIES];
224 static MemoryRegion io_mem_watch;
225 #endif
227 /* log support */
228 #ifdef WIN32
229 static const char *logfilename = "qemu.log";
230 #else
231 static const char *logfilename = "/tmp/qemu.log";
232 #endif
233 FILE *logfile;
234 int loglevel;
235 static int log_append = 0;
237 /* statistics */
238 #if !defined(CONFIG_USER_ONLY)
239 static int tlb_flush_count;
240 #endif
241 static int tb_flush_count;
242 static int tb_phys_invalidate_count;
244 #ifdef _WIN32
245 static void map_exec(void *addr, long size)
247 DWORD old_protect;
248 VirtualProtect(addr, size,
249 PAGE_EXECUTE_READWRITE, &old_protect);
252 #else
253 static void map_exec(void *addr, long size)
255 unsigned long start, end, page_size;
257 page_size = getpagesize();
258 start = (unsigned long)addr;
259 start &= ~(page_size - 1);
261 end = (unsigned long)addr + size;
262 end += page_size - 1;
263 end &= ~(page_size - 1);
265 mprotect((void *)start, end - start,
266 PROT_READ | PROT_WRITE | PROT_EXEC);
268 #endif
270 static void page_init(void)
272 /* NOTE: we can always suppose that qemu_host_page_size >=
273 TARGET_PAGE_SIZE */
274 #ifdef _WIN32
276 SYSTEM_INFO system_info;
278 GetSystemInfo(&system_info);
279 qemu_real_host_page_size = system_info.dwPageSize;
281 #else
282 qemu_real_host_page_size = getpagesize();
283 #endif
284 if (qemu_host_page_size == 0)
285 qemu_host_page_size = qemu_real_host_page_size;
286 if (qemu_host_page_size < TARGET_PAGE_SIZE)
287 qemu_host_page_size = TARGET_PAGE_SIZE;
288 qemu_host_page_mask = ~(qemu_host_page_size - 1);
290 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
292 #ifdef HAVE_KINFO_GETVMMAP
293 struct kinfo_vmentry *freep;
294 int i, cnt;
296 freep = kinfo_getvmmap(getpid(), &cnt);
297 if (freep) {
298 mmap_lock();
299 for (i = 0; i < cnt; i++) {
300 unsigned long startaddr, endaddr;
302 startaddr = freep[i].kve_start;
303 endaddr = freep[i].kve_end;
304 if (h2g_valid(startaddr)) {
305 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
307 if (h2g_valid(endaddr)) {
308 endaddr = h2g(endaddr);
309 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
310 } else {
311 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
312 endaddr = ~0ul;
313 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
314 #endif
318 free(freep);
319 mmap_unlock();
321 #else
322 FILE *f;
324 last_brk = (unsigned long)sbrk(0);
326 f = fopen("/compat/linux/proc/self/maps", "r");
327 if (f) {
328 mmap_lock();
330 do {
331 unsigned long startaddr, endaddr;
332 int n;
334 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
336 if (n == 2 && h2g_valid(startaddr)) {
337 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
339 if (h2g_valid(endaddr)) {
340 endaddr = h2g(endaddr);
341 } else {
342 endaddr = ~0ul;
344 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
346 } while (!feof(f));
348 fclose(f);
349 mmap_unlock();
351 #endif
353 #endif
356 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
358 PageDesc *pd;
359 void **lp;
360 int i;
362 #if defined(CONFIG_USER_ONLY)
363 /* We can't use g_malloc because it may recurse into a locked mutex. */
364 # define ALLOC(P, SIZE) \
365 do { \
366 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
367 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
368 } while (0)
369 #else
370 # define ALLOC(P, SIZE) \
371 do { P = g_malloc0(SIZE); } while (0)
372 #endif
374 /* Level 1. Always allocated. */
375 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
377 /* Level 2..N-1. */
378 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
379 void **p = *lp;
381 if (p == NULL) {
382 if (!alloc) {
383 return NULL;
385 ALLOC(p, sizeof(void *) * L2_SIZE);
386 *lp = p;
389 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
392 pd = *lp;
393 if (pd == NULL) {
394 if (!alloc) {
395 return NULL;
397 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
398 *lp = pd;
401 #undef ALLOC
403 return pd + (index & (L2_SIZE - 1));
406 static inline PageDesc *page_find(tb_page_addr_t index)
408 return page_find_alloc(index, 0);
411 #if !defined(CONFIG_USER_ONLY)
413 static PhysPageEntry *phys_map_node_alloc(uint16_t *ptr)
415 unsigned i;
416 uint16_t ret;
418 /* Assign early to avoid the pointer being invalidated by g_renew() */
419 *ptr = ret = phys_map_nodes_nb++;
420 assert(ret != PHYS_MAP_NODE_NIL);
421 if (ret == phys_map_nodes_nb_alloc) {
422 typedef PhysPageEntry Node[L2_SIZE];
423 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
424 phys_map_nodes = g_renew(Node, phys_map_nodes,
425 phys_map_nodes_nb_alloc);
427 for (i = 0; i < L2_SIZE; ++i) {
428 phys_map_nodes[ret][i].u.node = PHYS_MAP_NODE_NIL;
430 return phys_map_nodes[ret];
433 static void phys_map_nodes_reset(void)
435 phys_map_nodes_nb = 0;
438 static uint16_t *phys_page_find_alloc(target_phys_addr_t index, int alloc)
440 PhysPageEntry *lp, *p;
441 int i, j;
443 lp = &phys_map;
445 /* Level 1..N. */
446 for (i = P_L2_LEVELS - 1; i >= 0; i--) {
447 if (lp->u.node == PHYS_MAP_NODE_NIL) {
448 if (!alloc) {
449 return NULL;
451 p = phys_map_node_alloc(&lp->u.node);
452 if (i == 0) {
453 for (j = 0; j < L2_SIZE; j++) {
454 p[j].u.leaf = phys_section_unassigned;
457 } else {
458 p = phys_map_nodes[lp->u.node];
460 lp = &p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
463 return &lp->u.leaf;
466 static inline PhysPageDesc phys_page_find(target_phys_addr_t index)
468 uint16_t *p = phys_page_find_alloc(index, 0);
469 uint16_t s_index = phys_section_unassigned;
470 MemoryRegionSection *section;
471 PhysPageDesc pd;
473 if (p) {
474 s_index = *p;
476 section = &phys_sections[s_index];
477 index <<= TARGET_PAGE_BITS;
478 assert(section->offset_within_address_space <= index
479 && index <= section->offset_within_address_space + section->size-1);
480 pd.phys_offset = section->mr->ram_addr;
481 pd.region_offset = (index - section->offset_within_address_space)
482 + section->offset_within_region;
483 if (memory_region_is_ram(section->mr)) {
484 pd.phys_offset += pd.region_offset;
485 pd.region_offset = 0;
486 } else if (section->mr->rom_device) {
487 pd.phys_offset += pd.region_offset;
489 if (section->readonly) {
490 pd.phys_offset |= io_mem_rom.ram_addr;
492 return pd;
495 static void tlb_protect_code(ram_addr_t ram_addr);
496 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
497 target_ulong vaddr);
498 #define mmap_lock() do { } while(0)
499 #define mmap_unlock() do { } while(0)
500 #endif
502 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
504 #if defined(CONFIG_USER_ONLY)
505 /* Currently it is not recommended to allocate big chunks of data in
506 user mode. It will change when a dedicated libc will be used */
507 #define USE_STATIC_CODE_GEN_BUFFER
508 #endif
510 #ifdef USE_STATIC_CODE_GEN_BUFFER
511 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
512 __attribute__((aligned (CODE_GEN_ALIGN)));
513 #endif
515 static void code_gen_alloc(unsigned long tb_size)
517 #ifdef USE_STATIC_CODE_GEN_BUFFER
518 code_gen_buffer = static_code_gen_buffer;
519 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
520 map_exec(code_gen_buffer, code_gen_buffer_size);
521 #else
522 code_gen_buffer_size = tb_size;
523 if (code_gen_buffer_size == 0) {
524 #if defined(CONFIG_USER_ONLY)
525 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
526 #else
527 /* XXX: needs adjustments */
528 code_gen_buffer_size = (unsigned long)(ram_size / 4);
529 #endif
531 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
532 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
533 /* The code gen buffer location may have constraints depending on
534 the host cpu and OS */
535 #if defined(__linux__)
537 int flags;
538 void *start = NULL;
540 flags = MAP_PRIVATE | MAP_ANONYMOUS;
541 #if defined(__x86_64__)
542 flags |= MAP_32BIT;
543 /* Cannot map more than that */
544 if (code_gen_buffer_size > (800 * 1024 * 1024))
545 code_gen_buffer_size = (800 * 1024 * 1024);
546 #elif defined(__sparc_v9__)
547 // Map the buffer below 2G, so we can use direct calls and branches
548 flags |= MAP_FIXED;
549 start = (void *) 0x60000000UL;
550 if (code_gen_buffer_size > (512 * 1024 * 1024))
551 code_gen_buffer_size = (512 * 1024 * 1024);
552 #elif defined(__arm__)
553 /* Keep the buffer no bigger than 16MB to branch between blocks */
554 if (code_gen_buffer_size > 16 * 1024 * 1024)
555 code_gen_buffer_size = 16 * 1024 * 1024;
556 #elif defined(__s390x__)
557 /* Map the buffer so that we can use direct calls and branches. */
558 /* We have a +- 4GB range on the branches; leave some slop. */
559 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
560 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
562 start = (void *)0x90000000UL;
563 #endif
564 code_gen_buffer = mmap(start, code_gen_buffer_size,
565 PROT_WRITE | PROT_READ | PROT_EXEC,
566 flags, -1, 0);
567 if (code_gen_buffer == MAP_FAILED) {
568 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
569 exit(1);
572 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
573 || defined(__DragonFly__) || defined(__OpenBSD__) \
574 || defined(__NetBSD__)
576 int flags;
577 void *addr = NULL;
578 flags = MAP_PRIVATE | MAP_ANONYMOUS;
579 #if defined(__x86_64__)
580 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
581 * 0x40000000 is free */
582 flags |= MAP_FIXED;
583 addr = (void *)0x40000000;
584 /* Cannot map more than that */
585 if (code_gen_buffer_size > (800 * 1024 * 1024))
586 code_gen_buffer_size = (800 * 1024 * 1024);
587 #elif defined(__sparc_v9__)
588 // Map the buffer below 2G, so we can use direct calls and branches
589 flags |= MAP_FIXED;
590 addr = (void *) 0x60000000UL;
591 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
592 code_gen_buffer_size = (512 * 1024 * 1024);
594 #endif
595 code_gen_buffer = mmap(addr, code_gen_buffer_size,
596 PROT_WRITE | PROT_READ | PROT_EXEC,
597 flags, -1, 0);
598 if (code_gen_buffer == MAP_FAILED) {
599 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
600 exit(1);
603 #else
604 code_gen_buffer = g_malloc(code_gen_buffer_size);
605 map_exec(code_gen_buffer, code_gen_buffer_size);
606 #endif
607 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
608 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
609 code_gen_buffer_max_size = code_gen_buffer_size -
610 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
611 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
612 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
615 /* Must be called before using the QEMU cpus. 'tb_size' is the size
616 (in bytes) allocated to the translation buffer. Zero means default
617 size. */
618 void tcg_exec_init(unsigned long tb_size)
620 cpu_gen_init();
621 code_gen_alloc(tb_size);
622 code_gen_ptr = code_gen_buffer;
623 page_init();
624 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
625 /* There's no guest base to take into account, so go ahead and
626 initialize the prologue now. */
627 tcg_prologue_init(&tcg_ctx);
628 #endif
631 bool tcg_enabled(void)
633 return code_gen_buffer != NULL;
636 void cpu_exec_init_all(void)
638 #if !defined(CONFIG_USER_ONLY)
639 memory_map_init();
640 io_mem_init();
641 #endif
644 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
646 static int cpu_common_post_load(void *opaque, int version_id)
648 CPUState *env = opaque;
650 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
651 version_id is increased. */
652 env->interrupt_request &= ~0x01;
653 tlb_flush(env, 1);
655 return 0;
658 static const VMStateDescription vmstate_cpu_common = {
659 .name = "cpu_common",
660 .version_id = 1,
661 .minimum_version_id = 1,
662 .minimum_version_id_old = 1,
663 .post_load = cpu_common_post_load,
664 .fields = (VMStateField []) {
665 VMSTATE_UINT32(halted, CPUState),
666 VMSTATE_UINT32(interrupt_request, CPUState),
667 VMSTATE_END_OF_LIST()
670 #endif
672 CPUState *qemu_get_cpu(int cpu)
674 CPUState *env = first_cpu;
676 while (env) {
677 if (env->cpu_index == cpu)
678 break;
679 env = env->next_cpu;
682 return env;
685 void cpu_exec_init(CPUState *env)
687 CPUState **penv;
688 int cpu_index;
690 #if defined(CONFIG_USER_ONLY)
691 cpu_list_lock();
692 #endif
693 env->next_cpu = NULL;
694 penv = &first_cpu;
695 cpu_index = 0;
696 while (*penv != NULL) {
697 penv = &(*penv)->next_cpu;
698 cpu_index++;
700 env->cpu_index = cpu_index;
701 env->numa_node = 0;
702 QTAILQ_INIT(&env->breakpoints);
703 QTAILQ_INIT(&env->watchpoints);
704 #ifndef CONFIG_USER_ONLY
705 env->thread_id = qemu_get_thread_id();
706 #endif
707 *penv = env;
708 #if defined(CONFIG_USER_ONLY)
709 cpu_list_unlock();
710 #endif
711 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
712 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
713 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
714 cpu_save, cpu_load, env);
715 #endif
718 /* Allocate a new translation block. Flush the translation buffer if
719 too many translation blocks or too much generated code. */
720 static TranslationBlock *tb_alloc(target_ulong pc)
722 TranslationBlock *tb;
724 if (nb_tbs >= code_gen_max_blocks ||
725 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
726 return NULL;
727 tb = &tbs[nb_tbs++];
728 tb->pc = pc;
729 tb->cflags = 0;
730 return tb;
733 void tb_free(TranslationBlock *tb)
735 /* In practice this is mostly used for single use temporary TB
736 Ignore the hard cases and just back up if this TB happens to
737 be the last one generated. */
738 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
739 code_gen_ptr = tb->tc_ptr;
740 nb_tbs--;
744 static inline void invalidate_page_bitmap(PageDesc *p)
746 if (p->code_bitmap) {
747 g_free(p->code_bitmap);
748 p->code_bitmap = NULL;
750 p->code_write_count = 0;
753 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
755 static void page_flush_tb_1 (int level, void **lp)
757 int i;
759 if (*lp == NULL) {
760 return;
762 if (level == 0) {
763 PageDesc *pd = *lp;
764 for (i = 0; i < L2_SIZE; ++i) {
765 pd[i].first_tb = NULL;
766 invalidate_page_bitmap(pd + i);
768 } else {
769 void **pp = *lp;
770 for (i = 0; i < L2_SIZE; ++i) {
771 page_flush_tb_1 (level - 1, pp + i);
776 static void page_flush_tb(void)
778 int i;
779 for (i = 0; i < V_L1_SIZE; i++) {
780 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
784 /* flush all the translation blocks */
785 /* XXX: tb_flush is currently not thread safe */
786 void tb_flush(CPUState *env1)
788 CPUState *env;
789 #if defined(DEBUG_FLUSH)
790 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
791 (unsigned long)(code_gen_ptr - code_gen_buffer),
792 nb_tbs, nb_tbs > 0 ?
793 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
794 #endif
795 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
796 cpu_abort(env1, "Internal error: code buffer overflow\n");
798 nb_tbs = 0;
800 for(env = first_cpu; env != NULL; env = env->next_cpu) {
801 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
804 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
805 page_flush_tb();
807 code_gen_ptr = code_gen_buffer;
808 /* XXX: flush processor icache at this point if cache flush is
809 expensive */
810 tb_flush_count++;
813 #ifdef DEBUG_TB_CHECK
815 static void tb_invalidate_check(target_ulong address)
817 TranslationBlock *tb;
818 int i;
819 address &= TARGET_PAGE_MASK;
820 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
821 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
822 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
823 address >= tb->pc + tb->size)) {
824 printf("ERROR invalidate: address=" TARGET_FMT_lx
825 " PC=%08lx size=%04x\n",
826 address, (long)tb->pc, tb->size);
832 /* verify that all the pages have correct rights for code */
833 static void tb_page_check(void)
835 TranslationBlock *tb;
836 int i, flags1, flags2;
838 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
839 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
840 flags1 = page_get_flags(tb->pc);
841 flags2 = page_get_flags(tb->pc + tb->size - 1);
842 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
843 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
844 (long)tb->pc, tb->size, flags1, flags2);
850 #endif
852 /* invalidate one TB */
853 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
854 int next_offset)
856 TranslationBlock *tb1;
857 for(;;) {
858 tb1 = *ptb;
859 if (tb1 == tb) {
860 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
861 break;
863 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
867 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
869 TranslationBlock *tb1;
870 unsigned int n1;
872 for(;;) {
873 tb1 = *ptb;
874 n1 = (long)tb1 & 3;
875 tb1 = (TranslationBlock *)((long)tb1 & ~3);
876 if (tb1 == tb) {
877 *ptb = tb1->page_next[n1];
878 break;
880 ptb = &tb1->page_next[n1];
884 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
886 TranslationBlock *tb1, **ptb;
887 unsigned int n1;
889 ptb = &tb->jmp_next[n];
890 tb1 = *ptb;
891 if (tb1) {
892 /* find tb(n) in circular list */
893 for(;;) {
894 tb1 = *ptb;
895 n1 = (long)tb1 & 3;
896 tb1 = (TranslationBlock *)((long)tb1 & ~3);
897 if (n1 == n && tb1 == tb)
898 break;
899 if (n1 == 2) {
900 ptb = &tb1->jmp_first;
901 } else {
902 ptb = &tb1->jmp_next[n1];
905 /* now we can suppress tb(n) from the list */
906 *ptb = tb->jmp_next[n];
908 tb->jmp_next[n] = NULL;
912 /* reset the jump entry 'n' of a TB so that it is not chained to
913 another TB */
914 static inline void tb_reset_jump(TranslationBlock *tb, int n)
916 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
919 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
921 CPUState *env;
922 PageDesc *p;
923 unsigned int h, n1;
924 tb_page_addr_t phys_pc;
925 TranslationBlock *tb1, *tb2;
927 /* remove the TB from the hash list */
928 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
929 h = tb_phys_hash_func(phys_pc);
930 tb_remove(&tb_phys_hash[h], tb,
931 offsetof(TranslationBlock, phys_hash_next));
933 /* remove the TB from the page list */
934 if (tb->page_addr[0] != page_addr) {
935 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
936 tb_page_remove(&p->first_tb, tb);
937 invalidate_page_bitmap(p);
939 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
940 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
941 tb_page_remove(&p->first_tb, tb);
942 invalidate_page_bitmap(p);
945 tb_invalidated_flag = 1;
947 /* remove the TB from the hash list */
948 h = tb_jmp_cache_hash_func(tb->pc);
949 for(env = first_cpu; env != NULL; env = env->next_cpu) {
950 if (env->tb_jmp_cache[h] == tb)
951 env->tb_jmp_cache[h] = NULL;
954 /* suppress this TB from the two jump lists */
955 tb_jmp_remove(tb, 0);
956 tb_jmp_remove(tb, 1);
958 /* suppress any remaining jumps to this TB */
959 tb1 = tb->jmp_first;
960 for(;;) {
961 n1 = (long)tb1 & 3;
962 if (n1 == 2)
963 break;
964 tb1 = (TranslationBlock *)((long)tb1 & ~3);
965 tb2 = tb1->jmp_next[n1];
966 tb_reset_jump(tb1, n1);
967 tb1->jmp_next[n1] = NULL;
968 tb1 = tb2;
970 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
972 tb_phys_invalidate_count++;
975 static inline void set_bits(uint8_t *tab, int start, int len)
977 int end, mask, end1;
979 end = start + len;
980 tab += start >> 3;
981 mask = 0xff << (start & 7);
982 if ((start & ~7) == (end & ~7)) {
983 if (start < end) {
984 mask &= ~(0xff << (end & 7));
985 *tab |= mask;
987 } else {
988 *tab++ |= mask;
989 start = (start + 8) & ~7;
990 end1 = end & ~7;
991 while (start < end1) {
992 *tab++ = 0xff;
993 start += 8;
995 if (start < end) {
996 mask = ~(0xff << (end & 7));
997 *tab |= mask;
1002 static void build_page_bitmap(PageDesc *p)
1004 int n, tb_start, tb_end;
1005 TranslationBlock *tb;
1007 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1009 tb = p->first_tb;
1010 while (tb != NULL) {
1011 n = (long)tb & 3;
1012 tb = (TranslationBlock *)((long)tb & ~3);
1013 /* NOTE: this is subtle as a TB may span two physical pages */
1014 if (n == 0) {
1015 /* NOTE: tb_end may be after the end of the page, but
1016 it is not a problem */
1017 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1018 tb_end = tb_start + tb->size;
1019 if (tb_end > TARGET_PAGE_SIZE)
1020 tb_end = TARGET_PAGE_SIZE;
1021 } else {
1022 tb_start = 0;
1023 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1025 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1026 tb = tb->page_next[n];
1030 TranslationBlock *tb_gen_code(CPUState *env,
1031 target_ulong pc, target_ulong cs_base,
1032 int flags, int cflags)
1034 TranslationBlock *tb;
1035 uint8_t *tc_ptr;
1036 tb_page_addr_t phys_pc, phys_page2;
1037 target_ulong virt_page2;
1038 int code_gen_size;
1040 phys_pc = get_page_addr_code(env, pc);
1041 tb = tb_alloc(pc);
1042 if (!tb) {
1043 /* flush must be done */
1044 tb_flush(env);
1045 /* cannot fail at this point */
1046 tb = tb_alloc(pc);
1047 /* Don't forget to invalidate previous TB info. */
1048 tb_invalidated_flag = 1;
1050 tc_ptr = code_gen_ptr;
1051 tb->tc_ptr = tc_ptr;
1052 tb->cs_base = cs_base;
1053 tb->flags = flags;
1054 tb->cflags = cflags;
1055 cpu_gen_code(env, tb, &code_gen_size);
1056 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1058 /* check next page if needed */
1059 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1060 phys_page2 = -1;
1061 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1062 phys_page2 = get_page_addr_code(env, virt_page2);
1064 tb_link_page(tb, phys_pc, phys_page2);
1065 return tb;
1068 /* invalidate all TBs which intersect with the target physical page
1069 starting in range [start;end[. NOTE: start and end must refer to
1070 the same physical page. 'is_cpu_write_access' should be true if called
1071 from a real cpu write access: the virtual CPU will exit the current
1072 TB if code is modified inside this TB. */
1073 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1074 int is_cpu_write_access)
1076 TranslationBlock *tb, *tb_next, *saved_tb;
1077 CPUState *env = cpu_single_env;
1078 tb_page_addr_t tb_start, tb_end;
1079 PageDesc *p;
1080 int n;
1081 #ifdef TARGET_HAS_PRECISE_SMC
1082 int current_tb_not_found = is_cpu_write_access;
1083 TranslationBlock *current_tb = NULL;
1084 int current_tb_modified = 0;
1085 target_ulong current_pc = 0;
1086 target_ulong current_cs_base = 0;
1087 int current_flags = 0;
1088 #endif /* TARGET_HAS_PRECISE_SMC */
1090 p = page_find(start >> TARGET_PAGE_BITS);
1091 if (!p)
1092 return;
1093 if (!p->code_bitmap &&
1094 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1095 is_cpu_write_access) {
1096 /* build code bitmap */
1097 build_page_bitmap(p);
1100 /* we remove all the TBs in the range [start, end[ */
1101 /* XXX: see if in some cases it could be faster to invalidate all the code */
1102 tb = p->first_tb;
1103 while (tb != NULL) {
1104 n = (long)tb & 3;
1105 tb = (TranslationBlock *)((long)tb & ~3);
1106 tb_next = tb->page_next[n];
1107 /* NOTE: this is subtle as a TB may span two physical pages */
1108 if (n == 0) {
1109 /* NOTE: tb_end may be after the end of the page, but
1110 it is not a problem */
1111 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1112 tb_end = tb_start + tb->size;
1113 } else {
1114 tb_start = tb->page_addr[1];
1115 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1117 if (!(tb_end <= start || tb_start >= end)) {
1118 #ifdef TARGET_HAS_PRECISE_SMC
1119 if (current_tb_not_found) {
1120 current_tb_not_found = 0;
1121 current_tb = NULL;
1122 if (env->mem_io_pc) {
1123 /* now we have a real cpu fault */
1124 current_tb = tb_find_pc(env->mem_io_pc);
1127 if (current_tb == tb &&
1128 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1129 /* If we are modifying the current TB, we must stop
1130 its execution. We could be more precise by checking
1131 that the modification is after the current PC, but it
1132 would require a specialized function to partially
1133 restore the CPU state */
1135 current_tb_modified = 1;
1136 cpu_restore_state(current_tb, env, env->mem_io_pc);
1137 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1138 &current_flags);
1140 #endif /* TARGET_HAS_PRECISE_SMC */
1141 /* we need to do that to handle the case where a signal
1142 occurs while doing tb_phys_invalidate() */
1143 saved_tb = NULL;
1144 if (env) {
1145 saved_tb = env->current_tb;
1146 env->current_tb = NULL;
1148 tb_phys_invalidate(tb, -1);
1149 if (env) {
1150 env->current_tb = saved_tb;
1151 if (env->interrupt_request && env->current_tb)
1152 cpu_interrupt(env, env->interrupt_request);
1155 tb = tb_next;
1157 #if !defined(CONFIG_USER_ONLY)
1158 /* if no code remaining, no need to continue to use slow writes */
1159 if (!p->first_tb) {
1160 invalidate_page_bitmap(p);
1161 if (is_cpu_write_access) {
1162 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1165 #endif
1166 #ifdef TARGET_HAS_PRECISE_SMC
1167 if (current_tb_modified) {
1168 /* we generate a block containing just the instruction
1169 modifying the memory. It will ensure that it cannot modify
1170 itself */
1171 env->current_tb = NULL;
1172 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1173 cpu_resume_from_signal(env, NULL);
1175 #endif
1178 /* len must be <= 8 and start must be a multiple of len */
1179 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1181 PageDesc *p;
1182 int offset, b;
1183 #if 0
1184 if (1) {
1185 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1186 cpu_single_env->mem_io_vaddr, len,
1187 cpu_single_env->eip,
1188 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1190 #endif
1191 p = page_find(start >> TARGET_PAGE_BITS);
1192 if (!p)
1193 return;
1194 if (p->code_bitmap) {
1195 offset = start & ~TARGET_PAGE_MASK;
1196 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1197 if (b & ((1 << len) - 1))
1198 goto do_invalidate;
1199 } else {
1200 do_invalidate:
1201 tb_invalidate_phys_page_range(start, start + len, 1);
1205 #if !defined(CONFIG_SOFTMMU)
1206 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1207 unsigned long pc, void *puc)
1209 TranslationBlock *tb;
1210 PageDesc *p;
1211 int n;
1212 #ifdef TARGET_HAS_PRECISE_SMC
1213 TranslationBlock *current_tb = NULL;
1214 CPUState *env = cpu_single_env;
1215 int current_tb_modified = 0;
1216 target_ulong current_pc = 0;
1217 target_ulong current_cs_base = 0;
1218 int current_flags = 0;
1219 #endif
1221 addr &= TARGET_PAGE_MASK;
1222 p = page_find(addr >> TARGET_PAGE_BITS);
1223 if (!p)
1224 return;
1225 tb = p->first_tb;
1226 #ifdef TARGET_HAS_PRECISE_SMC
1227 if (tb && pc != 0) {
1228 current_tb = tb_find_pc(pc);
1230 #endif
1231 while (tb != NULL) {
1232 n = (long)tb & 3;
1233 tb = (TranslationBlock *)((long)tb & ~3);
1234 #ifdef TARGET_HAS_PRECISE_SMC
1235 if (current_tb == tb &&
1236 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1237 /* If we are modifying the current TB, we must stop
1238 its execution. We could be more precise by checking
1239 that the modification is after the current PC, but it
1240 would require a specialized function to partially
1241 restore the CPU state */
1243 current_tb_modified = 1;
1244 cpu_restore_state(current_tb, env, pc);
1245 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1246 &current_flags);
1248 #endif /* TARGET_HAS_PRECISE_SMC */
1249 tb_phys_invalidate(tb, addr);
1250 tb = tb->page_next[n];
1252 p->first_tb = NULL;
1253 #ifdef TARGET_HAS_PRECISE_SMC
1254 if (current_tb_modified) {
1255 /* we generate a block containing just the instruction
1256 modifying the memory. It will ensure that it cannot modify
1257 itself */
1258 env->current_tb = NULL;
1259 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1260 cpu_resume_from_signal(env, puc);
1262 #endif
1264 #endif
1266 /* add the tb in the target page and protect it if necessary */
1267 static inline void tb_alloc_page(TranslationBlock *tb,
1268 unsigned int n, tb_page_addr_t page_addr)
1270 PageDesc *p;
1271 #ifndef CONFIG_USER_ONLY
1272 bool page_already_protected;
1273 #endif
1275 tb->page_addr[n] = page_addr;
1276 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1277 tb->page_next[n] = p->first_tb;
1278 #ifndef CONFIG_USER_ONLY
1279 page_already_protected = p->first_tb != NULL;
1280 #endif
1281 p->first_tb = (TranslationBlock *)((long)tb | n);
1282 invalidate_page_bitmap(p);
1284 #if defined(TARGET_HAS_SMC) || 1
1286 #if defined(CONFIG_USER_ONLY)
1287 if (p->flags & PAGE_WRITE) {
1288 target_ulong addr;
1289 PageDesc *p2;
1290 int prot;
1292 /* force the host page as non writable (writes will have a
1293 page fault + mprotect overhead) */
1294 page_addr &= qemu_host_page_mask;
1295 prot = 0;
1296 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1297 addr += TARGET_PAGE_SIZE) {
1299 p2 = page_find (addr >> TARGET_PAGE_BITS);
1300 if (!p2)
1301 continue;
1302 prot |= p2->flags;
1303 p2->flags &= ~PAGE_WRITE;
1305 mprotect(g2h(page_addr), qemu_host_page_size,
1306 (prot & PAGE_BITS) & ~PAGE_WRITE);
1307 #ifdef DEBUG_TB_INVALIDATE
1308 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1309 page_addr);
1310 #endif
1312 #else
1313 /* if some code is already present, then the pages are already
1314 protected. So we handle the case where only the first TB is
1315 allocated in a physical page */
1316 if (!page_already_protected) {
1317 tlb_protect_code(page_addr);
1319 #endif
1321 #endif /* TARGET_HAS_SMC */
1324 /* add a new TB and link it to the physical page tables. phys_page2 is
1325 (-1) to indicate that only one page contains the TB. */
1326 void tb_link_page(TranslationBlock *tb,
1327 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1329 unsigned int h;
1330 TranslationBlock **ptb;
1332 /* Grab the mmap lock to stop another thread invalidating this TB
1333 before we are done. */
1334 mmap_lock();
1335 /* add in the physical hash table */
1336 h = tb_phys_hash_func(phys_pc);
1337 ptb = &tb_phys_hash[h];
1338 tb->phys_hash_next = *ptb;
1339 *ptb = tb;
1341 /* add in the page list */
1342 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1343 if (phys_page2 != -1)
1344 tb_alloc_page(tb, 1, phys_page2);
1345 else
1346 tb->page_addr[1] = -1;
1348 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1349 tb->jmp_next[0] = NULL;
1350 tb->jmp_next[1] = NULL;
1352 /* init original jump addresses */
1353 if (tb->tb_next_offset[0] != 0xffff)
1354 tb_reset_jump(tb, 0);
1355 if (tb->tb_next_offset[1] != 0xffff)
1356 tb_reset_jump(tb, 1);
1358 #ifdef DEBUG_TB_CHECK
1359 tb_page_check();
1360 #endif
1361 mmap_unlock();
1364 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1365 tb[1].tc_ptr. Return NULL if not found */
1366 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1368 int m_min, m_max, m;
1369 unsigned long v;
1370 TranslationBlock *tb;
1372 if (nb_tbs <= 0)
1373 return NULL;
1374 if (tc_ptr < (unsigned long)code_gen_buffer ||
1375 tc_ptr >= (unsigned long)code_gen_ptr)
1376 return NULL;
1377 /* binary search (cf Knuth) */
1378 m_min = 0;
1379 m_max = nb_tbs - 1;
1380 while (m_min <= m_max) {
1381 m = (m_min + m_max) >> 1;
1382 tb = &tbs[m];
1383 v = (unsigned long)tb->tc_ptr;
1384 if (v == tc_ptr)
1385 return tb;
1386 else if (tc_ptr < v) {
1387 m_max = m - 1;
1388 } else {
1389 m_min = m + 1;
1392 return &tbs[m_max];
1395 static void tb_reset_jump_recursive(TranslationBlock *tb);
1397 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1399 TranslationBlock *tb1, *tb_next, **ptb;
1400 unsigned int n1;
1402 tb1 = tb->jmp_next[n];
1403 if (tb1 != NULL) {
1404 /* find head of list */
1405 for(;;) {
1406 n1 = (long)tb1 & 3;
1407 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1408 if (n1 == 2)
1409 break;
1410 tb1 = tb1->jmp_next[n1];
1412 /* we are now sure now that tb jumps to tb1 */
1413 tb_next = tb1;
1415 /* remove tb from the jmp_first list */
1416 ptb = &tb_next->jmp_first;
1417 for(;;) {
1418 tb1 = *ptb;
1419 n1 = (long)tb1 & 3;
1420 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1421 if (n1 == n && tb1 == tb)
1422 break;
1423 ptb = &tb1->jmp_next[n1];
1425 *ptb = tb->jmp_next[n];
1426 tb->jmp_next[n] = NULL;
1428 /* suppress the jump to next tb in generated code */
1429 tb_reset_jump(tb, n);
1431 /* suppress jumps in the tb on which we could have jumped */
1432 tb_reset_jump_recursive(tb_next);
1436 static void tb_reset_jump_recursive(TranslationBlock *tb)
1438 tb_reset_jump_recursive2(tb, 0);
1439 tb_reset_jump_recursive2(tb, 1);
1442 #if defined(TARGET_HAS_ICE)
1443 #if defined(CONFIG_USER_ONLY)
1444 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1446 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1448 #else
1449 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1451 target_phys_addr_t addr;
1452 target_ulong pd;
1453 ram_addr_t ram_addr;
1454 PhysPageDesc p;
1456 addr = cpu_get_phys_page_debug(env, pc);
1457 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1458 pd = p.phys_offset;
1459 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1460 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1462 #endif
1463 #endif /* TARGET_HAS_ICE */
1465 #if defined(CONFIG_USER_ONLY)
1466 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1471 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1472 int flags, CPUWatchpoint **watchpoint)
1474 return -ENOSYS;
1476 #else
1477 /* Add a watchpoint. */
1478 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1479 int flags, CPUWatchpoint **watchpoint)
1481 target_ulong len_mask = ~(len - 1);
1482 CPUWatchpoint *wp;
1484 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1485 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1486 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1487 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1488 return -EINVAL;
1490 wp = g_malloc(sizeof(*wp));
1492 wp->vaddr = addr;
1493 wp->len_mask = len_mask;
1494 wp->flags = flags;
1496 /* keep all GDB-injected watchpoints in front */
1497 if (flags & BP_GDB)
1498 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1499 else
1500 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1502 tlb_flush_page(env, addr);
1504 if (watchpoint)
1505 *watchpoint = wp;
1506 return 0;
1509 /* Remove a specific watchpoint. */
1510 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1511 int flags)
1513 target_ulong len_mask = ~(len - 1);
1514 CPUWatchpoint *wp;
1516 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1517 if (addr == wp->vaddr && len_mask == wp->len_mask
1518 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1519 cpu_watchpoint_remove_by_ref(env, wp);
1520 return 0;
1523 return -ENOENT;
1526 /* Remove a specific watchpoint by reference. */
1527 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1529 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1531 tlb_flush_page(env, watchpoint->vaddr);
1533 g_free(watchpoint);
1536 /* Remove all matching watchpoints. */
1537 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1539 CPUWatchpoint *wp, *next;
1541 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1542 if (wp->flags & mask)
1543 cpu_watchpoint_remove_by_ref(env, wp);
1546 #endif
1548 /* Add a breakpoint. */
1549 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1550 CPUBreakpoint **breakpoint)
1552 #if defined(TARGET_HAS_ICE)
1553 CPUBreakpoint *bp;
1555 bp = g_malloc(sizeof(*bp));
1557 bp->pc = pc;
1558 bp->flags = flags;
1560 /* keep all GDB-injected breakpoints in front */
1561 if (flags & BP_GDB)
1562 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1563 else
1564 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1566 breakpoint_invalidate(env, pc);
1568 if (breakpoint)
1569 *breakpoint = bp;
1570 return 0;
1571 #else
1572 return -ENOSYS;
1573 #endif
1576 /* Remove a specific breakpoint. */
1577 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1579 #if defined(TARGET_HAS_ICE)
1580 CPUBreakpoint *bp;
1582 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1583 if (bp->pc == pc && bp->flags == flags) {
1584 cpu_breakpoint_remove_by_ref(env, bp);
1585 return 0;
1588 return -ENOENT;
1589 #else
1590 return -ENOSYS;
1591 #endif
1594 /* Remove a specific breakpoint by reference. */
1595 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1597 #if defined(TARGET_HAS_ICE)
1598 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1600 breakpoint_invalidate(env, breakpoint->pc);
1602 g_free(breakpoint);
1603 #endif
1606 /* Remove all matching breakpoints. */
1607 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1609 #if defined(TARGET_HAS_ICE)
1610 CPUBreakpoint *bp, *next;
1612 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1613 if (bp->flags & mask)
1614 cpu_breakpoint_remove_by_ref(env, bp);
1616 #endif
1619 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1620 CPU loop after each instruction */
1621 void cpu_single_step(CPUState *env, int enabled)
1623 #if defined(TARGET_HAS_ICE)
1624 if (env->singlestep_enabled != enabled) {
1625 env->singlestep_enabled = enabled;
1626 if (kvm_enabled())
1627 kvm_update_guest_debug(env, 0);
1628 else {
1629 /* must flush all the translated code to avoid inconsistencies */
1630 /* XXX: only flush what is necessary */
1631 tb_flush(env);
1634 #endif
1637 /* enable or disable low levels log */
1638 void cpu_set_log(int log_flags)
1640 loglevel = log_flags;
1641 if (loglevel && !logfile) {
1642 logfile = fopen(logfilename, log_append ? "a" : "w");
1643 if (!logfile) {
1644 perror(logfilename);
1645 _exit(1);
1647 #if !defined(CONFIG_SOFTMMU)
1648 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1650 static char logfile_buf[4096];
1651 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1653 #elif defined(_WIN32)
1654 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1655 setvbuf(logfile, NULL, _IONBF, 0);
1656 #else
1657 setvbuf(logfile, NULL, _IOLBF, 0);
1658 #endif
1659 log_append = 1;
1661 if (!loglevel && logfile) {
1662 fclose(logfile);
1663 logfile = NULL;
1667 void cpu_set_log_filename(const char *filename)
1669 logfilename = strdup(filename);
1670 if (logfile) {
1671 fclose(logfile);
1672 logfile = NULL;
1674 cpu_set_log(loglevel);
1677 static void cpu_unlink_tb(CPUState *env)
1679 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1680 problem and hope the cpu will stop of its own accord. For userspace
1681 emulation this often isn't actually as bad as it sounds. Often
1682 signals are used primarily to interrupt blocking syscalls. */
1683 TranslationBlock *tb;
1684 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1686 spin_lock(&interrupt_lock);
1687 tb = env->current_tb;
1688 /* if the cpu is currently executing code, we must unlink it and
1689 all the potentially executing TB */
1690 if (tb) {
1691 env->current_tb = NULL;
1692 tb_reset_jump_recursive(tb);
1694 spin_unlock(&interrupt_lock);
1697 #ifndef CONFIG_USER_ONLY
1698 /* mask must never be zero, except for A20 change call */
1699 static void tcg_handle_interrupt(CPUState *env, int mask)
1701 int old_mask;
1703 old_mask = env->interrupt_request;
1704 env->interrupt_request |= mask;
1707 * If called from iothread context, wake the target cpu in
1708 * case its halted.
1710 if (!qemu_cpu_is_self(env)) {
1711 qemu_cpu_kick(env);
1712 return;
1715 if (use_icount) {
1716 env->icount_decr.u16.high = 0xffff;
1717 if (!can_do_io(env)
1718 && (mask & ~old_mask) != 0) {
1719 cpu_abort(env, "Raised interrupt while not in I/O function");
1721 } else {
1722 cpu_unlink_tb(env);
1726 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1728 #else /* CONFIG_USER_ONLY */
1730 void cpu_interrupt(CPUState *env, int mask)
1732 env->interrupt_request |= mask;
1733 cpu_unlink_tb(env);
1735 #endif /* CONFIG_USER_ONLY */
1737 void cpu_reset_interrupt(CPUState *env, int mask)
1739 env->interrupt_request &= ~mask;
1742 void cpu_exit(CPUState *env)
1744 env->exit_request = 1;
1745 cpu_unlink_tb(env);
1748 const CPULogItem cpu_log_items[] = {
1749 { CPU_LOG_TB_OUT_ASM, "out_asm",
1750 "show generated host assembly code for each compiled TB" },
1751 { CPU_LOG_TB_IN_ASM, "in_asm",
1752 "show target assembly code for each compiled TB" },
1753 { CPU_LOG_TB_OP, "op",
1754 "show micro ops for each compiled TB" },
1755 { CPU_LOG_TB_OP_OPT, "op_opt",
1756 "show micro ops "
1757 #ifdef TARGET_I386
1758 "before eflags optimization and "
1759 #endif
1760 "after liveness analysis" },
1761 { CPU_LOG_INT, "int",
1762 "show interrupts/exceptions in short format" },
1763 { CPU_LOG_EXEC, "exec",
1764 "show trace before each executed TB (lots of logs)" },
1765 { CPU_LOG_TB_CPU, "cpu",
1766 "show CPU state before block translation" },
1767 #ifdef TARGET_I386
1768 { CPU_LOG_PCALL, "pcall",
1769 "show protected mode far calls/returns/exceptions" },
1770 { CPU_LOG_RESET, "cpu_reset",
1771 "show CPU state before CPU resets" },
1772 #endif
1773 #ifdef DEBUG_IOPORT
1774 { CPU_LOG_IOPORT, "ioport",
1775 "show all i/o ports accesses" },
1776 #endif
1777 { 0, NULL, NULL },
1780 static int cmp1(const char *s1, int n, const char *s2)
1782 if (strlen(s2) != n)
1783 return 0;
1784 return memcmp(s1, s2, n) == 0;
1787 /* takes a comma separated list of log masks. Return 0 if error. */
1788 int cpu_str_to_log_mask(const char *str)
1790 const CPULogItem *item;
1791 int mask;
1792 const char *p, *p1;
1794 p = str;
1795 mask = 0;
1796 for(;;) {
1797 p1 = strchr(p, ',');
1798 if (!p1)
1799 p1 = p + strlen(p);
1800 if(cmp1(p,p1-p,"all")) {
1801 for(item = cpu_log_items; item->mask != 0; item++) {
1802 mask |= item->mask;
1804 } else {
1805 for(item = cpu_log_items; item->mask != 0; item++) {
1806 if (cmp1(p, p1 - p, item->name))
1807 goto found;
1809 return 0;
1811 found:
1812 mask |= item->mask;
1813 if (*p1 != ',')
1814 break;
1815 p = p1 + 1;
1817 return mask;
1820 void cpu_abort(CPUState *env, const char *fmt, ...)
1822 va_list ap;
1823 va_list ap2;
1825 va_start(ap, fmt);
1826 va_copy(ap2, ap);
1827 fprintf(stderr, "qemu: fatal: ");
1828 vfprintf(stderr, fmt, ap);
1829 fprintf(stderr, "\n");
1830 #ifdef TARGET_I386
1831 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1832 #else
1833 cpu_dump_state(env, stderr, fprintf, 0);
1834 #endif
1835 if (qemu_log_enabled()) {
1836 qemu_log("qemu: fatal: ");
1837 qemu_log_vprintf(fmt, ap2);
1838 qemu_log("\n");
1839 #ifdef TARGET_I386
1840 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1841 #else
1842 log_cpu_state(env, 0);
1843 #endif
1844 qemu_log_flush();
1845 qemu_log_close();
1847 va_end(ap2);
1848 va_end(ap);
1849 #if defined(CONFIG_USER_ONLY)
1851 struct sigaction act;
1852 sigfillset(&act.sa_mask);
1853 act.sa_handler = SIG_DFL;
1854 sigaction(SIGABRT, &act, NULL);
1856 #endif
1857 abort();
1860 CPUState *cpu_copy(CPUState *env)
1862 CPUState *new_env = cpu_init(env->cpu_model_str);
1863 CPUState *next_cpu = new_env->next_cpu;
1864 int cpu_index = new_env->cpu_index;
1865 #if defined(TARGET_HAS_ICE)
1866 CPUBreakpoint *bp;
1867 CPUWatchpoint *wp;
1868 #endif
1870 memcpy(new_env, env, sizeof(CPUState));
1872 /* Preserve chaining and index. */
1873 new_env->next_cpu = next_cpu;
1874 new_env->cpu_index = cpu_index;
1876 /* Clone all break/watchpoints.
1877 Note: Once we support ptrace with hw-debug register access, make sure
1878 BP_CPU break/watchpoints are handled correctly on clone. */
1879 QTAILQ_INIT(&env->breakpoints);
1880 QTAILQ_INIT(&env->watchpoints);
1881 #if defined(TARGET_HAS_ICE)
1882 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1883 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1885 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1886 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1887 wp->flags, NULL);
1889 #endif
1891 return new_env;
1894 #if !defined(CONFIG_USER_ONLY)
1896 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1898 unsigned int i;
1900 /* Discard jump cache entries for any tb which might potentially
1901 overlap the flushed page. */
1902 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1903 memset (&env->tb_jmp_cache[i], 0,
1904 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1906 i = tb_jmp_cache_hash_page(addr);
1907 memset (&env->tb_jmp_cache[i], 0,
1908 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1911 static CPUTLBEntry s_cputlb_empty_entry = {
1912 .addr_read = -1,
1913 .addr_write = -1,
1914 .addr_code = -1,
1915 .addend = -1,
1918 /* NOTE:
1919 * If flush_global is true (the usual case), flush all tlb entries.
1920 * If flush_global is false, flush (at least) all tlb entries not
1921 * marked global.
1923 * Since QEMU doesn't currently implement a global/not-global flag
1924 * for tlb entries, at the moment tlb_flush() will also flush all
1925 * tlb entries in the flush_global == false case. This is OK because
1926 * CPU architectures generally permit an implementation to drop
1927 * entries from the TLB at any time, so flushing more entries than
1928 * required is only an efficiency issue, not a correctness issue.
1930 void tlb_flush(CPUState *env, int flush_global)
1932 int i;
1934 #if defined(DEBUG_TLB)
1935 printf("tlb_flush:\n");
1936 #endif
1937 /* must reset current TB so that interrupts cannot modify the
1938 links while we are modifying them */
1939 env->current_tb = NULL;
1941 for(i = 0; i < CPU_TLB_SIZE; i++) {
1942 int mmu_idx;
1943 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1944 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1948 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1950 env->tlb_flush_addr = -1;
1951 env->tlb_flush_mask = 0;
1952 tlb_flush_count++;
1955 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1957 if (addr == (tlb_entry->addr_read &
1958 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1959 addr == (tlb_entry->addr_write &
1960 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1961 addr == (tlb_entry->addr_code &
1962 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1963 *tlb_entry = s_cputlb_empty_entry;
1967 void tlb_flush_page(CPUState *env, target_ulong addr)
1969 int i;
1970 int mmu_idx;
1972 #if defined(DEBUG_TLB)
1973 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1974 #endif
1975 /* Check if we need to flush due to large pages. */
1976 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
1977 #if defined(DEBUG_TLB)
1978 printf("tlb_flush_page: forced full flush ("
1979 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
1980 env->tlb_flush_addr, env->tlb_flush_mask);
1981 #endif
1982 tlb_flush(env, 1);
1983 return;
1985 /* must reset current TB so that interrupts cannot modify the
1986 links while we are modifying them */
1987 env->current_tb = NULL;
1989 addr &= TARGET_PAGE_MASK;
1990 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
1991 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
1992 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
1994 tlb_flush_jmp_cache(env, addr);
1997 /* update the TLBs so that writes to code in the virtual page 'addr'
1998 can be detected */
1999 static void tlb_protect_code(ram_addr_t ram_addr)
2001 cpu_physical_memory_reset_dirty(ram_addr,
2002 ram_addr + TARGET_PAGE_SIZE,
2003 CODE_DIRTY_FLAG);
2006 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2007 tested for self modifying code */
2008 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2009 target_ulong vaddr)
2011 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2014 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2015 unsigned long start, unsigned long length)
2017 unsigned long addr;
2018 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2019 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2020 if ((addr - start) < length) {
2021 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2026 /* Note: start and end must be within the same ram block. */
2027 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2028 int dirty_flags)
2030 CPUState *env;
2031 unsigned long length, start1;
2032 int i;
2034 start &= TARGET_PAGE_MASK;
2035 end = TARGET_PAGE_ALIGN(end);
2037 length = end - start;
2038 if (length == 0)
2039 return;
2040 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2042 /* we modify the TLB cache so that the dirty bit will be set again
2043 when accessing the range */
2044 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2045 /* Check that we don't span multiple blocks - this breaks the
2046 address comparisons below. */
2047 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2048 != (end - 1) - start) {
2049 abort();
2052 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2053 int mmu_idx;
2054 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2055 for(i = 0; i < CPU_TLB_SIZE; i++)
2056 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2057 start1, length);
2062 int cpu_physical_memory_set_dirty_tracking(int enable)
2064 int ret = 0;
2065 in_migration = enable;
2066 return ret;
2069 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2071 ram_addr_t ram_addr;
2072 void *p;
2074 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2075 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2076 + tlb_entry->addend);
2077 ram_addr = qemu_ram_addr_from_host_nofail(p);
2078 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2079 tlb_entry->addr_write |= TLB_NOTDIRTY;
2084 /* update the TLB according to the current state of the dirty bits */
2085 void cpu_tlb_update_dirty(CPUState *env)
2087 int i;
2088 int mmu_idx;
2089 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2090 for(i = 0; i < CPU_TLB_SIZE; i++)
2091 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2095 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2097 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2098 tlb_entry->addr_write = vaddr;
2101 /* update the TLB corresponding to virtual page vaddr
2102 so that it is no longer dirty */
2103 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2105 int i;
2106 int mmu_idx;
2108 vaddr &= TARGET_PAGE_MASK;
2109 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2110 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2111 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2114 /* Our TLB does not support large pages, so remember the area covered by
2115 large pages and trigger a full TLB flush if these are invalidated. */
2116 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2117 target_ulong size)
2119 target_ulong mask = ~(size - 1);
2121 if (env->tlb_flush_addr == (target_ulong)-1) {
2122 env->tlb_flush_addr = vaddr & mask;
2123 env->tlb_flush_mask = mask;
2124 return;
2126 /* Extend the existing region to include the new page.
2127 This is a compromise between unnecessary flushes and the cost
2128 of maintaining a full variable size TLB. */
2129 mask &= env->tlb_flush_mask;
2130 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2131 mask <<= 1;
2133 env->tlb_flush_addr &= mask;
2134 env->tlb_flush_mask = mask;
2137 static bool is_ram_rom(ram_addr_t pd)
2139 pd &= ~TARGET_PAGE_MASK;
2140 return pd == io_mem_ram.ram_addr || pd == io_mem_rom.ram_addr;
2143 static bool is_romd(ram_addr_t pd)
2145 MemoryRegion *mr;
2147 pd &= ~TARGET_PAGE_MASK;
2148 mr = io_mem_region[pd];
2149 return mr->rom_device && mr->readable;
2152 static bool is_ram_rom_romd(ram_addr_t pd)
2154 return is_ram_rom(pd) || is_romd(pd);
2157 /* Add a new TLB entry. At most one entry for a given virtual address
2158 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2159 supplied size is only used by tlb_flush_page. */
2160 void tlb_set_page(CPUState *env, target_ulong vaddr,
2161 target_phys_addr_t paddr, int prot,
2162 int mmu_idx, target_ulong size)
2164 PhysPageDesc p;
2165 unsigned long pd;
2166 unsigned int index;
2167 target_ulong address;
2168 target_ulong code_address;
2169 unsigned long addend;
2170 CPUTLBEntry *te;
2171 CPUWatchpoint *wp;
2172 target_phys_addr_t iotlb;
2174 assert(size >= TARGET_PAGE_SIZE);
2175 if (size != TARGET_PAGE_SIZE) {
2176 tlb_add_large_page(env, vaddr, size);
2178 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2179 pd = p.phys_offset;
2180 #if defined(DEBUG_TLB)
2181 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2182 " prot=%x idx=%d pd=0x%08lx\n",
2183 vaddr, paddr, prot, mmu_idx, pd);
2184 #endif
2186 address = vaddr;
2187 if (!is_ram_rom_romd(pd)) {
2188 /* IO memory case (romd handled later) */
2189 address |= TLB_MMIO;
2191 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2192 if (is_ram_rom(pd)) {
2193 /* Normal RAM. */
2194 iotlb = pd & TARGET_PAGE_MASK;
2195 if ((pd & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr)
2196 iotlb |= io_mem_notdirty.ram_addr;
2197 else
2198 iotlb |= io_mem_rom.ram_addr;
2199 } else {
2200 /* IO handlers are currently passed a physical address.
2201 It would be nice to pass an offset from the base address
2202 of that region. This would avoid having to special case RAM,
2203 and avoid full address decoding in every device.
2204 We can't use the high bits of pd for this because
2205 IO_MEM_ROMD uses these as a ram address. */
2206 iotlb = (pd & ~TARGET_PAGE_MASK);
2207 iotlb += p.region_offset;
2210 code_address = address;
2211 /* Make accesses to pages with watchpoints go via the
2212 watchpoint trap routines. */
2213 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2214 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2215 /* Avoid trapping reads of pages with a write breakpoint. */
2216 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2217 iotlb = io_mem_watch.ram_addr + paddr;
2218 address |= TLB_MMIO;
2219 break;
2224 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2225 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2226 te = &env->tlb_table[mmu_idx][index];
2227 te->addend = addend - vaddr;
2228 if (prot & PAGE_READ) {
2229 te->addr_read = address;
2230 } else {
2231 te->addr_read = -1;
2234 if (prot & PAGE_EXEC) {
2235 te->addr_code = code_address;
2236 } else {
2237 te->addr_code = -1;
2239 if (prot & PAGE_WRITE) {
2240 if ((pd & ~TARGET_PAGE_MASK) == io_mem_rom.ram_addr || is_romd(pd)) {
2241 /* Write access calls the I/O callback. */
2242 te->addr_write = address | TLB_MMIO;
2243 } else if ((pd & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr &&
2244 !cpu_physical_memory_is_dirty(pd)) {
2245 te->addr_write = address | TLB_NOTDIRTY;
2246 } else {
2247 te->addr_write = address;
2249 } else {
2250 te->addr_write = -1;
2254 #else
2256 void tlb_flush(CPUState *env, int flush_global)
2260 void tlb_flush_page(CPUState *env, target_ulong addr)
2265 * Walks guest process memory "regions" one by one
2266 * and calls callback function 'fn' for each region.
2269 struct walk_memory_regions_data
2271 walk_memory_regions_fn fn;
2272 void *priv;
2273 unsigned long start;
2274 int prot;
2277 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2278 abi_ulong end, int new_prot)
2280 if (data->start != -1ul) {
2281 int rc = data->fn(data->priv, data->start, end, data->prot);
2282 if (rc != 0) {
2283 return rc;
2287 data->start = (new_prot ? end : -1ul);
2288 data->prot = new_prot;
2290 return 0;
2293 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2294 abi_ulong base, int level, void **lp)
2296 abi_ulong pa;
2297 int i, rc;
2299 if (*lp == NULL) {
2300 return walk_memory_regions_end(data, base, 0);
2303 if (level == 0) {
2304 PageDesc *pd = *lp;
2305 for (i = 0; i < L2_SIZE; ++i) {
2306 int prot = pd[i].flags;
2308 pa = base | (i << TARGET_PAGE_BITS);
2309 if (prot != data->prot) {
2310 rc = walk_memory_regions_end(data, pa, prot);
2311 if (rc != 0) {
2312 return rc;
2316 } else {
2317 void **pp = *lp;
2318 for (i = 0; i < L2_SIZE; ++i) {
2319 pa = base | ((abi_ulong)i <<
2320 (TARGET_PAGE_BITS + L2_BITS * level));
2321 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2322 if (rc != 0) {
2323 return rc;
2328 return 0;
2331 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2333 struct walk_memory_regions_data data;
2334 unsigned long i;
2336 data.fn = fn;
2337 data.priv = priv;
2338 data.start = -1ul;
2339 data.prot = 0;
2341 for (i = 0; i < V_L1_SIZE; i++) {
2342 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2343 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2344 if (rc != 0) {
2345 return rc;
2349 return walk_memory_regions_end(&data, 0, 0);
2352 static int dump_region(void *priv, abi_ulong start,
2353 abi_ulong end, unsigned long prot)
2355 FILE *f = (FILE *)priv;
2357 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2358 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2359 start, end, end - start,
2360 ((prot & PAGE_READ) ? 'r' : '-'),
2361 ((prot & PAGE_WRITE) ? 'w' : '-'),
2362 ((prot & PAGE_EXEC) ? 'x' : '-'));
2364 return (0);
2367 /* dump memory mappings */
2368 void page_dump(FILE *f)
2370 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2371 "start", "end", "size", "prot");
2372 walk_memory_regions(f, dump_region);
2375 int page_get_flags(target_ulong address)
2377 PageDesc *p;
2379 p = page_find(address >> TARGET_PAGE_BITS);
2380 if (!p)
2381 return 0;
2382 return p->flags;
2385 /* Modify the flags of a page and invalidate the code if necessary.
2386 The flag PAGE_WRITE_ORG is positioned automatically depending
2387 on PAGE_WRITE. The mmap_lock should already be held. */
2388 void page_set_flags(target_ulong start, target_ulong end, int flags)
2390 target_ulong addr, len;
2392 /* This function should never be called with addresses outside the
2393 guest address space. If this assert fires, it probably indicates
2394 a missing call to h2g_valid. */
2395 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2396 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2397 #endif
2398 assert(start < end);
2400 start = start & TARGET_PAGE_MASK;
2401 end = TARGET_PAGE_ALIGN(end);
2403 if (flags & PAGE_WRITE) {
2404 flags |= PAGE_WRITE_ORG;
2407 for (addr = start, len = end - start;
2408 len != 0;
2409 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2410 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2412 /* If the write protection bit is set, then we invalidate
2413 the code inside. */
2414 if (!(p->flags & PAGE_WRITE) &&
2415 (flags & PAGE_WRITE) &&
2416 p->first_tb) {
2417 tb_invalidate_phys_page(addr, 0, NULL);
2419 p->flags = flags;
2423 int page_check_range(target_ulong start, target_ulong len, int flags)
2425 PageDesc *p;
2426 target_ulong end;
2427 target_ulong addr;
2429 /* This function should never be called with addresses outside the
2430 guest address space. If this assert fires, it probably indicates
2431 a missing call to h2g_valid. */
2432 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2433 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2434 #endif
2436 if (len == 0) {
2437 return 0;
2439 if (start + len - 1 < start) {
2440 /* We've wrapped around. */
2441 return -1;
2444 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2445 start = start & TARGET_PAGE_MASK;
2447 for (addr = start, len = end - start;
2448 len != 0;
2449 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2450 p = page_find(addr >> TARGET_PAGE_BITS);
2451 if( !p )
2452 return -1;
2453 if( !(p->flags & PAGE_VALID) )
2454 return -1;
2456 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2457 return -1;
2458 if (flags & PAGE_WRITE) {
2459 if (!(p->flags & PAGE_WRITE_ORG))
2460 return -1;
2461 /* unprotect the page if it was put read-only because it
2462 contains translated code */
2463 if (!(p->flags & PAGE_WRITE)) {
2464 if (!page_unprotect(addr, 0, NULL))
2465 return -1;
2467 return 0;
2470 return 0;
2473 /* called from signal handler: invalidate the code and unprotect the
2474 page. Return TRUE if the fault was successfully handled. */
2475 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2477 unsigned int prot;
2478 PageDesc *p;
2479 target_ulong host_start, host_end, addr;
2481 /* Technically this isn't safe inside a signal handler. However we
2482 know this only ever happens in a synchronous SEGV handler, so in
2483 practice it seems to be ok. */
2484 mmap_lock();
2486 p = page_find(address >> TARGET_PAGE_BITS);
2487 if (!p) {
2488 mmap_unlock();
2489 return 0;
2492 /* if the page was really writable, then we change its
2493 protection back to writable */
2494 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2495 host_start = address & qemu_host_page_mask;
2496 host_end = host_start + qemu_host_page_size;
2498 prot = 0;
2499 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2500 p = page_find(addr >> TARGET_PAGE_BITS);
2501 p->flags |= PAGE_WRITE;
2502 prot |= p->flags;
2504 /* and since the content will be modified, we must invalidate
2505 the corresponding translated code. */
2506 tb_invalidate_phys_page(addr, pc, puc);
2507 #ifdef DEBUG_TB_CHECK
2508 tb_invalidate_check(addr);
2509 #endif
2511 mprotect((void *)g2h(host_start), qemu_host_page_size,
2512 prot & PAGE_BITS);
2514 mmap_unlock();
2515 return 1;
2517 mmap_unlock();
2518 return 0;
2521 static inline void tlb_set_dirty(CPUState *env,
2522 unsigned long addr, target_ulong vaddr)
2525 #endif /* defined(CONFIG_USER_ONLY) */
2527 #if !defined(CONFIG_USER_ONLY)
2529 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2530 typedef struct subpage_t {
2531 MemoryRegion iomem;
2532 target_phys_addr_t base;
2533 uint16_t sub_section[TARGET_PAGE_SIZE];
2534 } subpage_t;
2536 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2537 uint16_t section);
2538 static subpage_t *subpage_init (target_phys_addr_t base, uint16_t *section,
2539 uint16_t orig_section);
2540 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2541 need_subpage) \
2542 do { \
2543 if (addr > start_addr) \
2544 start_addr2 = 0; \
2545 else { \
2546 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2547 if (start_addr2 > 0) \
2548 need_subpage = 1; \
2551 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2552 end_addr2 = TARGET_PAGE_SIZE - 1; \
2553 else { \
2554 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2555 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2556 need_subpage = 1; \
2558 } while (0)
2560 static void destroy_page_desc(uint16_t section_index)
2562 MemoryRegionSection *section = &phys_sections[section_index];
2563 MemoryRegion *mr = section->mr;
2565 if (mr->subpage) {
2566 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2567 memory_region_destroy(&subpage->iomem);
2568 g_free(subpage);
2572 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2574 unsigned i;
2575 PhysPageEntry *p;
2577 if (lp->u.node == PHYS_MAP_NODE_NIL) {
2578 return;
2581 p = phys_map_nodes[lp->u.node];
2582 for (i = 0; i < L2_SIZE; ++i) {
2583 if (level > 0) {
2584 destroy_l2_mapping(&p[i], level - 1);
2585 } else {
2586 destroy_page_desc(p[i].u.leaf);
2589 lp->u.node = PHYS_MAP_NODE_NIL;
2592 static void destroy_all_mappings(void)
2594 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2595 phys_map_nodes_reset();
2598 static uint16_t phys_section_add(MemoryRegionSection *section)
2600 if (phys_sections_nb == phys_sections_nb_alloc) {
2601 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2602 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2603 phys_sections_nb_alloc);
2605 phys_sections[phys_sections_nb] = *section;
2606 return phys_sections_nb++;
2609 static void phys_sections_clear(void)
2611 phys_sections_nb = 0;
2614 /* register physical memory.
2615 For RAM, 'size' must be a multiple of the target page size.
2616 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2617 io memory page. The address used when calling the IO function is
2618 the offset from the start of the region, plus region_offset. Both
2619 start_addr and region_offset are rounded down to a page boundary
2620 before calculating this offset. This should not be a problem unless
2621 the low bits of start_addr and region_offset differ. */
2622 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2623 bool readonly)
2625 target_phys_addr_t start_addr = section->offset_within_address_space;
2626 ram_addr_t size = section->size;
2627 target_phys_addr_t addr, end_addr;
2628 CPUState *env;
2629 ram_addr_t orig_size = size;
2630 subpage_t *subpage;
2631 uint16_t section_index = phys_section_add(section);
2633 assert(size);
2635 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2636 end_addr = start_addr + (target_phys_addr_t)size;
2638 addr = start_addr;
2639 do {
2640 uint16_t *p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2641 uint16_t orig_memory = *p;
2642 target_phys_addr_t start_addr2, end_addr2;
2643 int need_subpage = 0;
2644 MemoryRegion *mr = phys_sections[orig_memory].mr;
2646 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2647 need_subpage);
2648 if (need_subpage) {
2649 if (!(mr->subpage)) {
2650 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2651 p, orig_memory);
2652 } else {
2653 subpage = container_of(mr, subpage_t, iomem);
2655 subpage_register(subpage, start_addr2, end_addr2,
2656 section_index);
2657 } else {
2658 *p = section_index;
2660 addr += TARGET_PAGE_SIZE;
2661 } while (addr != end_addr);
2663 /* since each CPU stores ram addresses in its TLB cache, we must
2664 reset the modified entries */
2665 /* XXX: slow ! */
2666 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2667 tlb_flush(env, 1);
2671 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2673 if (kvm_enabled())
2674 kvm_coalesce_mmio_region(addr, size);
2677 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2679 if (kvm_enabled())
2680 kvm_uncoalesce_mmio_region(addr, size);
2683 void qemu_flush_coalesced_mmio_buffer(void)
2685 if (kvm_enabled())
2686 kvm_flush_coalesced_mmio_buffer();
2689 #if defined(__linux__) && !defined(TARGET_S390X)
2691 #include <sys/vfs.h>
2693 #define HUGETLBFS_MAGIC 0x958458f6
2695 static long gethugepagesize(const char *path)
2697 struct statfs fs;
2698 int ret;
2700 do {
2701 ret = statfs(path, &fs);
2702 } while (ret != 0 && errno == EINTR);
2704 if (ret != 0) {
2705 perror(path);
2706 return 0;
2709 if (fs.f_type != HUGETLBFS_MAGIC)
2710 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2712 return fs.f_bsize;
2715 static void *file_ram_alloc(RAMBlock *block,
2716 ram_addr_t memory,
2717 const char *path)
2719 char *filename;
2720 void *area;
2721 int fd;
2722 #ifdef MAP_POPULATE
2723 int flags;
2724 #endif
2725 unsigned long hpagesize;
2727 hpagesize = gethugepagesize(path);
2728 if (!hpagesize) {
2729 return NULL;
2732 if (memory < hpagesize) {
2733 return NULL;
2736 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2737 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2738 return NULL;
2741 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2742 return NULL;
2745 fd = mkstemp(filename);
2746 if (fd < 0) {
2747 perror("unable to create backing store for hugepages");
2748 free(filename);
2749 return NULL;
2751 unlink(filename);
2752 free(filename);
2754 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2757 * ftruncate is not supported by hugetlbfs in older
2758 * hosts, so don't bother bailing out on errors.
2759 * If anything goes wrong with it under other filesystems,
2760 * mmap will fail.
2762 if (ftruncate(fd, memory))
2763 perror("ftruncate");
2765 #ifdef MAP_POPULATE
2766 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2767 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2768 * to sidestep this quirk.
2770 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2771 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2772 #else
2773 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2774 #endif
2775 if (area == MAP_FAILED) {
2776 perror("file_ram_alloc: can't mmap RAM pages");
2777 close(fd);
2778 return (NULL);
2780 block->fd = fd;
2781 return area;
2783 #endif
2785 static ram_addr_t find_ram_offset(ram_addr_t size)
2787 RAMBlock *block, *next_block;
2788 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2790 if (QLIST_EMPTY(&ram_list.blocks))
2791 return 0;
2793 QLIST_FOREACH(block, &ram_list.blocks, next) {
2794 ram_addr_t end, next = RAM_ADDR_MAX;
2796 end = block->offset + block->length;
2798 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2799 if (next_block->offset >= end) {
2800 next = MIN(next, next_block->offset);
2803 if (next - end >= size && next - end < mingap) {
2804 offset = end;
2805 mingap = next - end;
2809 if (offset == RAM_ADDR_MAX) {
2810 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2811 (uint64_t)size);
2812 abort();
2815 return offset;
2818 static ram_addr_t last_ram_offset(void)
2820 RAMBlock *block;
2821 ram_addr_t last = 0;
2823 QLIST_FOREACH(block, &ram_list.blocks, next)
2824 last = MAX(last, block->offset + block->length);
2826 return last;
2829 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2831 RAMBlock *new_block, *block;
2833 new_block = NULL;
2834 QLIST_FOREACH(block, &ram_list.blocks, next) {
2835 if (block->offset == addr) {
2836 new_block = block;
2837 break;
2840 assert(new_block);
2841 assert(!new_block->idstr[0]);
2843 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2844 char *id = dev->parent_bus->info->get_dev_path(dev);
2845 if (id) {
2846 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2847 g_free(id);
2850 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2852 QLIST_FOREACH(block, &ram_list.blocks, next) {
2853 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2854 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2855 new_block->idstr);
2856 abort();
2861 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2862 MemoryRegion *mr)
2864 RAMBlock *new_block;
2866 size = TARGET_PAGE_ALIGN(size);
2867 new_block = g_malloc0(sizeof(*new_block));
2869 new_block->mr = mr;
2870 new_block->offset = find_ram_offset(size);
2871 if (host) {
2872 new_block->host = host;
2873 new_block->flags |= RAM_PREALLOC_MASK;
2874 } else {
2875 if (mem_path) {
2876 #if defined (__linux__) && !defined(TARGET_S390X)
2877 new_block->host = file_ram_alloc(new_block, size, mem_path);
2878 if (!new_block->host) {
2879 new_block->host = qemu_vmalloc(size);
2880 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2882 #else
2883 fprintf(stderr, "-mem-path option unsupported\n");
2884 exit(1);
2885 #endif
2886 } else {
2887 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2888 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2889 an system defined value, which is at least 256GB. Larger systems
2890 have larger values. We put the guest between the end of data
2891 segment (system break) and this value. We use 32GB as a base to
2892 have enough room for the system break to grow. */
2893 new_block->host = mmap((void*)0x800000000, size,
2894 PROT_EXEC|PROT_READ|PROT_WRITE,
2895 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2896 if (new_block->host == MAP_FAILED) {
2897 fprintf(stderr, "Allocating RAM failed\n");
2898 abort();
2900 #else
2901 if (xen_enabled()) {
2902 xen_ram_alloc(new_block->offset, size, mr);
2903 } else {
2904 new_block->host = qemu_vmalloc(size);
2906 #endif
2907 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2910 new_block->length = size;
2912 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2914 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2915 last_ram_offset() >> TARGET_PAGE_BITS);
2916 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2917 0xff, size >> TARGET_PAGE_BITS);
2919 if (kvm_enabled())
2920 kvm_setup_guest_memory(new_block->host, size);
2922 return new_block->offset;
2925 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2927 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2930 void qemu_ram_free_from_ptr(ram_addr_t addr)
2932 RAMBlock *block;
2934 QLIST_FOREACH(block, &ram_list.blocks, next) {
2935 if (addr == block->offset) {
2936 QLIST_REMOVE(block, next);
2937 g_free(block);
2938 return;
2943 void qemu_ram_free(ram_addr_t addr)
2945 RAMBlock *block;
2947 QLIST_FOREACH(block, &ram_list.blocks, next) {
2948 if (addr == block->offset) {
2949 QLIST_REMOVE(block, next);
2950 if (block->flags & RAM_PREALLOC_MASK) {
2952 } else if (mem_path) {
2953 #if defined (__linux__) && !defined(TARGET_S390X)
2954 if (block->fd) {
2955 munmap(block->host, block->length);
2956 close(block->fd);
2957 } else {
2958 qemu_vfree(block->host);
2960 #else
2961 abort();
2962 #endif
2963 } else {
2964 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2965 munmap(block->host, block->length);
2966 #else
2967 if (xen_enabled()) {
2968 xen_invalidate_map_cache_entry(block->host);
2969 } else {
2970 qemu_vfree(block->host);
2972 #endif
2974 g_free(block);
2975 return;
2981 #ifndef _WIN32
2982 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2984 RAMBlock *block;
2985 ram_addr_t offset;
2986 int flags;
2987 void *area, *vaddr;
2989 QLIST_FOREACH(block, &ram_list.blocks, next) {
2990 offset = addr - block->offset;
2991 if (offset < block->length) {
2992 vaddr = block->host + offset;
2993 if (block->flags & RAM_PREALLOC_MASK) {
2995 } else {
2996 flags = MAP_FIXED;
2997 munmap(vaddr, length);
2998 if (mem_path) {
2999 #if defined(__linux__) && !defined(TARGET_S390X)
3000 if (block->fd) {
3001 #ifdef MAP_POPULATE
3002 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3003 MAP_PRIVATE;
3004 #else
3005 flags |= MAP_PRIVATE;
3006 #endif
3007 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3008 flags, block->fd, offset);
3009 } else {
3010 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3011 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3012 flags, -1, 0);
3014 #else
3015 abort();
3016 #endif
3017 } else {
3018 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3019 flags |= MAP_SHARED | MAP_ANONYMOUS;
3020 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3021 flags, -1, 0);
3022 #else
3023 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3024 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3025 flags, -1, 0);
3026 #endif
3028 if (area != vaddr) {
3029 fprintf(stderr, "Could not remap addr: "
3030 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3031 length, addr);
3032 exit(1);
3034 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3036 return;
3040 #endif /* !_WIN32 */
3042 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3043 With the exception of the softmmu code in this file, this should
3044 only be used for local memory (e.g. video ram) that the device owns,
3045 and knows it isn't going to access beyond the end of the block.
3047 It should not be used for general purpose DMA.
3048 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3050 void *qemu_get_ram_ptr(ram_addr_t addr)
3052 RAMBlock *block;
3054 QLIST_FOREACH(block, &ram_list.blocks, next) {
3055 if (addr - block->offset < block->length) {
3056 /* Move this entry to to start of the list. */
3057 if (block != QLIST_FIRST(&ram_list.blocks)) {
3058 QLIST_REMOVE(block, next);
3059 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3061 if (xen_enabled()) {
3062 /* We need to check if the requested address is in the RAM
3063 * because we don't want to map the entire memory in QEMU.
3064 * In that case just map until the end of the page.
3066 if (block->offset == 0) {
3067 return xen_map_cache(addr, 0, 0);
3068 } else if (block->host == NULL) {
3069 block->host =
3070 xen_map_cache(block->offset, block->length, 1);
3073 return block->host + (addr - block->offset);
3077 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3078 abort();
3080 return NULL;
3083 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3084 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3086 void *qemu_safe_ram_ptr(ram_addr_t addr)
3088 RAMBlock *block;
3090 QLIST_FOREACH(block, &ram_list.blocks, next) {
3091 if (addr - block->offset < block->length) {
3092 if (xen_enabled()) {
3093 /* We need to check if the requested address is in the RAM
3094 * because we don't want to map the entire memory in QEMU.
3095 * In that case just map until the end of the page.
3097 if (block->offset == 0) {
3098 return xen_map_cache(addr, 0, 0);
3099 } else if (block->host == NULL) {
3100 block->host =
3101 xen_map_cache(block->offset, block->length, 1);
3104 return block->host + (addr - block->offset);
3108 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3109 abort();
3111 return NULL;
3114 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3115 * but takes a size argument */
3116 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3118 if (*size == 0) {
3119 return NULL;
3121 if (xen_enabled()) {
3122 return xen_map_cache(addr, *size, 1);
3123 } else {
3124 RAMBlock *block;
3126 QLIST_FOREACH(block, &ram_list.blocks, next) {
3127 if (addr - block->offset < block->length) {
3128 if (addr - block->offset + *size > block->length)
3129 *size = block->length - addr + block->offset;
3130 return block->host + (addr - block->offset);
3134 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3135 abort();
3139 void qemu_put_ram_ptr(void *addr)
3141 trace_qemu_put_ram_ptr(addr);
3144 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3146 RAMBlock *block;
3147 uint8_t *host = ptr;
3149 if (xen_enabled()) {
3150 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3151 return 0;
3154 QLIST_FOREACH(block, &ram_list.blocks, next) {
3155 /* This case append when the block is not mapped. */
3156 if (block->host == NULL) {
3157 continue;
3159 if (host - block->host < block->length) {
3160 *ram_addr = block->offset + (host - block->host);
3161 return 0;
3165 return -1;
3168 /* Some of the softmmu routines need to translate from a host pointer
3169 (typically a TLB entry) back to a ram offset. */
3170 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3172 ram_addr_t ram_addr;
3174 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3175 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3176 abort();
3178 return ram_addr;
3181 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
3182 unsigned size)
3184 #ifdef DEBUG_UNASSIGNED
3185 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3186 #endif
3187 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3188 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
3189 #endif
3190 return 0;
3193 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
3194 uint64_t val, unsigned size)
3196 #ifdef DEBUG_UNASSIGNED
3197 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
3198 #endif
3199 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3200 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
3201 #endif
3204 static const MemoryRegionOps unassigned_mem_ops = {
3205 .read = unassigned_mem_read,
3206 .write = unassigned_mem_write,
3207 .endianness = DEVICE_NATIVE_ENDIAN,
3210 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
3211 unsigned size)
3213 abort();
3216 static void error_mem_write(void *opaque, target_phys_addr_t addr,
3217 uint64_t value, unsigned size)
3219 abort();
3222 static const MemoryRegionOps error_mem_ops = {
3223 .read = error_mem_read,
3224 .write = error_mem_write,
3225 .endianness = DEVICE_NATIVE_ENDIAN,
3228 static const MemoryRegionOps rom_mem_ops = {
3229 .read = error_mem_read,
3230 .write = unassigned_mem_write,
3231 .endianness = DEVICE_NATIVE_ENDIAN,
3234 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
3235 uint64_t val, unsigned size)
3237 int dirty_flags;
3238 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3239 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3240 #if !defined(CONFIG_USER_ONLY)
3241 tb_invalidate_phys_page_fast(ram_addr, size);
3242 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3243 #endif
3245 switch (size) {
3246 case 1:
3247 stb_p(qemu_get_ram_ptr(ram_addr), val);
3248 break;
3249 case 2:
3250 stw_p(qemu_get_ram_ptr(ram_addr), val);
3251 break;
3252 case 4:
3253 stl_p(qemu_get_ram_ptr(ram_addr), val);
3254 break;
3255 default:
3256 abort();
3258 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3259 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3260 /* we remove the notdirty callback only if the code has been
3261 flushed */
3262 if (dirty_flags == 0xff)
3263 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3266 static const MemoryRegionOps notdirty_mem_ops = {
3267 .read = error_mem_read,
3268 .write = notdirty_mem_write,
3269 .endianness = DEVICE_NATIVE_ENDIAN,
3272 /* Generate a debug exception if a watchpoint has been hit. */
3273 static void check_watchpoint(int offset, int len_mask, int flags)
3275 CPUState *env = cpu_single_env;
3276 target_ulong pc, cs_base;
3277 TranslationBlock *tb;
3278 target_ulong vaddr;
3279 CPUWatchpoint *wp;
3280 int cpu_flags;
3282 if (env->watchpoint_hit) {
3283 /* We re-entered the check after replacing the TB. Now raise
3284 * the debug interrupt so that is will trigger after the
3285 * current instruction. */
3286 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3287 return;
3289 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3290 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3291 if ((vaddr == (wp->vaddr & len_mask) ||
3292 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3293 wp->flags |= BP_WATCHPOINT_HIT;
3294 if (!env->watchpoint_hit) {
3295 env->watchpoint_hit = wp;
3296 tb = tb_find_pc(env->mem_io_pc);
3297 if (!tb) {
3298 cpu_abort(env, "check_watchpoint: could not find TB for "
3299 "pc=%p", (void *)env->mem_io_pc);
3301 cpu_restore_state(tb, env, env->mem_io_pc);
3302 tb_phys_invalidate(tb, -1);
3303 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3304 env->exception_index = EXCP_DEBUG;
3305 } else {
3306 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3307 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3309 cpu_resume_from_signal(env, NULL);
3311 } else {
3312 wp->flags &= ~BP_WATCHPOINT_HIT;
3317 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3318 so these check for a hit then pass through to the normal out-of-line
3319 phys routines. */
3320 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3321 unsigned size)
3323 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3324 switch (size) {
3325 case 1: return ldub_phys(addr);
3326 case 2: return lduw_phys(addr);
3327 case 4: return ldl_phys(addr);
3328 default: abort();
3332 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3333 uint64_t val, unsigned size)
3335 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3336 switch (size) {
3337 case 1: stb_phys(addr, val);
3338 case 2: stw_phys(addr, val);
3339 case 4: stl_phys(addr, val);
3340 default: abort();
3344 static const MemoryRegionOps watch_mem_ops = {
3345 .read = watch_mem_read,
3346 .write = watch_mem_write,
3347 .endianness = DEVICE_NATIVE_ENDIAN,
3350 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3351 unsigned len)
3353 subpage_t *mmio = opaque;
3354 unsigned int idx = SUBPAGE_IDX(addr);
3355 MemoryRegionSection *section;
3356 #if defined(DEBUG_SUBPAGE)
3357 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3358 mmio, len, addr, idx);
3359 #endif
3361 section = &phys_sections[mmio->sub_section[idx]];
3362 addr += mmio->base;
3363 addr -= section->offset_within_address_space;
3364 addr += section->offset_within_region;
3365 return io_mem_read(section->mr->ram_addr, addr, len);
3368 static void subpage_write(void *opaque, target_phys_addr_t addr,
3369 uint64_t value, unsigned len)
3371 subpage_t *mmio = opaque;
3372 unsigned int idx = SUBPAGE_IDX(addr);
3373 MemoryRegionSection *section;
3374 #if defined(DEBUG_SUBPAGE)
3375 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3376 " idx %d value %"PRIx64"\n",
3377 __func__, mmio, len, addr, idx, value);
3378 #endif
3380 section = &phys_sections[mmio->sub_section[idx]];
3381 addr += mmio->base;
3382 addr -= section->offset_within_address_space;
3383 addr += section->offset_within_region;
3384 io_mem_write(section->mr->ram_addr, addr, value, len);
3387 static const MemoryRegionOps subpage_ops = {
3388 .read = subpage_read,
3389 .write = subpage_write,
3390 .endianness = DEVICE_NATIVE_ENDIAN,
3393 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3394 unsigned size)
3396 ram_addr_t raddr = addr;
3397 void *ptr = qemu_get_ram_ptr(raddr);
3398 switch (size) {
3399 case 1: return ldub_p(ptr);
3400 case 2: return lduw_p(ptr);
3401 case 4: return ldl_p(ptr);
3402 default: abort();
3406 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3407 uint64_t value, unsigned size)
3409 ram_addr_t raddr = addr;
3410 void *ptr = qemu_get_ram_ptr(raddr);
3411 switch (size) {
3412 case 1: return stb_p(ptr, value);
3413 case 2: return stw_p(ptr, value);
3414 case 4: return stl_p(ptr, value);
3415 default: abort();
3419 static const MemoryRegionOps subpage_ram_ops = {
3420 .read = subpage_ram_read,
3421 .write = subpage_ram_write,
3422 .endianness = DEVICE_NATIVE_ENDIAN,
3425 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3426 uint16_t section)
3428 int idx, eidx;
3430 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3431 return -1;
3432 idx = SUBPAGE_IDX(start);
3433 eidx = SUBPAGE_IDX(end);
3434 #if defined(DEBUG_SUBPAGE)
3435 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3436 mmio, start, end, idx, eidx, memory);
3437 #endif
3438 if (memory_region_is_ram(phys_sections[section].mr)) {
3439 MemoryRegionSection new_section = phys_sections[section];
3440 new_section.mr = &io_mem_subpage_ram;
3441 section = phys_section_add(&new_section);
3443 for (; idx <= eidx; idx++) {
3444 mmio->sub_section[idx] = section;
3447 return 0;
3450 static subpage_t *subpage_init (target_phys_addr_t base, uint16_t *section_ind,
3451 uint16_t orig_section)
3453 subpage_t *mmio;
3454 MemoryRegionSection section = {
3455 .offset_within_address_space = base,
3456 .size = TARGET_PAGE_SIZE,
3459 mmio = g_malloc0(sizeof(subpage_t));
3461 mmio->base = base;
3462 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3463 "subpage", TARGET_PAGE_SIZE);
3464 mmio->iomem.subpage = true;
3465 section.mr = &mmio->iomem;
3466 #if defined(DEBUG_SUBPAGE)
3467 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3468 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3469 #endif
3470 *section_ind = phys_section_add(&section);
3471 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_section);
3473 return mmio;
3476 static int get_free_io_mem_idx(void)
3478 int i;
3480 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3481 if (!io_mem_used[i]) {
3482 io_mem_used[i] = 1;
3483 return i;
3485 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3486 return -1;
3489 /* mem_read and mem_write are arrays of functions containing the
3490 function to access byte (index 0), word (index 1) and dword (index
3491 2). Functions can be omitted with a NULL function pointer.
3492 If io_index is non zero, the corresponding io zone is
3493 modified. If it is zero, a new io zone is allocated. The return
3494 value can be used with cpu_register_physical_memory(). (-1) is
3495 returned if error. */
3496 static int cpu_register_io_memory_fixed(int io_index, MemoryRegion *mr)
3498 if (io_index <= 0) {
3499 io_index = get_free_io_mem_idx();
3500 if (io_index == -1)
3501 return io_index;
3502 } else {
3503 if (io_index >= IO_MEM_NB_ENTRIES)
3504 return -1;
3507 io_mem_region[io_index] = mr;
3509 return io_index;
3512 int cpu_register_io_memory(MemoryRegion *mr)
3514 return cpu_register_io_memory_fixed(0, mr);
3517 void cpu_unregister_io_memory(int io_index)
3519 io_mem_region[io_index] = NULL;
3520 io_mem_used[io_index] = 0;
3523 static uint16_t dummy_section(MemoryRegion *mr)
3525 MemoryRegionSection section = {
3526 .mr = mr,
3527 .offset_within_address_space = 0,
3528 .offset_within_region = 0,
3529 .size = UINT64_MAX,
3532 return phys_section_add(&section);
3535 static void io_mem_init(void)
3537 int i;
3539 /* Must be first: */
3540 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3541 assert(io_mem_ram.ram_addr == 0);
3542 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3543 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3544 "unassigned", UINT64_MAX);
3545 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3546 "notdirty", UINT64_MAX);
3547 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3548 "subpage-ram", UINT64_MAX);
3549 for (i=0; i<5; i++)
3550 io_mem_used[i] = 1;
3552 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3553 "watch", UINT64_MAX);
3556 static void core_begin(MemoryListener *listener)
3558 destroy_all_mappings();
3559 phys_sections_clear();
3560 phys_map.u.node = PHYS_MAP_NODE_NIL;
3561 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3564 static void core_commit(MemoryListener *listener)
3568 static void core_region_add(MemoryListener *listener,
3569 MemoryRegionSection *section)
3571 cpu_register_physical_memory_log(section, section->readonly);
3574 static void core_region_del(MemoryListener *listener,
3575 MemoryRegionSection *section)
3579 static void core_region_nop(MemoryListener *listener,
3580 MemoryRegionSection *section)
3582 cpu_register_physical_memory_log(section, section->readonly);
3585 static void core_log_start(MemoryListener *listener,
3586 MemoryRegionSection *section)
3590 static void core_log_stop(MemoryListener *listener,
3591 MemoryRegionSection *section)
3595 static void core_log_sync(MemoryListener *listener,
3596 MemoryRegionSection *section)
3600 static void core_log_global_start(MemoryListener *listener)
3602 cpu_physical_memory_set_dirty_tracking(1);
3605 static void core_log_global_stop(MemoryListener *listener)
3607 cpu_physical_memory_set_dirty_tracking(0);
3610 static void core_eventfd_add(MemoryListener *listener,
3611 MemoryRegionSection *section,
3612 bool match_data, uint64_t data, int fd)
3616 static void core_eventfd_del(MemoryListener *listener,
3617 MemoryRegionSection *section,
3618 bool match_data, uint64_t data, int fd)
3622 static void io_begin(MemoryListener *listener)
3626 static void io_commit(MemoryListener *listener)
3630 static void io_region_add(MemoryListener *listener,
3631 MemoryRegionSection *section)
3633 iorange_init(&section->mr->iorange, &memory_region_iorange_ops,
3634 section->offset_within_address_space, section->size);
3635 ioport_register(&section->mr->iorange);
3638 static void io_region_del(MemoryListener *listener,
3639 MemoryRegionSection *section)
3641 isa_unassign_ioport(section->offset_within_address_space, section->size);
3644 static void io_region_nop(MemoryListener *listener,
3645 MemoryRegionSection *section)
3649 static void io_log_start(MemoryListener *listener,
3650 MemoryRegionSection *section)
3654 static void io_log_stop(MemoryListener *listener,
3655 MemoryRegionSection *section)
3659 static void io_log_sync(MemoryListener *listener,
3660 MemoryRegionSection *section)
3664 static void io_log_global_start(MemoryListener *listener)
3668 static void io_log_global_stop(MemoryListener *listener)
3672 static void io_eventfd_add(MemoryListener *listener,
3673 MemoryRegionSection *section,
3674 bool match_data, uint64_t data, int fd)
3678 static void io_eventfd_del(MemoryListener *listener,
3679 MemoryRegionSection *section,
3680 bool match_data, uint64_t data, int fd)
3684 static MemoryListener core_memory_listener = {
3685 .begin = core_begin,
3686 .commit = core_commit,
3687 .region_add = core_region_add,
3688 .region_del = core_region_del,
3689 .region_nop = core_region_nop,
3690 .log_start = core_log_start,
3691 .log_stop = core_log_stop,
3692 .log_sync = core_log_sync,
3693 .log_global_start = core_log_global_start,
3694 .log_global_stop = core_log_global_stop,
3695 .eventfd_add = core_eventfd_add,
3696 .eventfd_del = core_eventfd_del,
3697 .priority = 0,
3700 static MemoryListener io_memory_listener = {
3701 .begin = io_begin,
3702 .commit = io_commit,
3703 .region_add = io_region_add,
3704 .region_del = io_region_del,
3705 .region_nop = io_region_nop,
3706 .log_start = io_log_start,
3707 .log_stop = io_log_stop,
3708 .log_sync = io_log_sync,
3709 .log_global_start = io_log_global_start,
3710 .log_global_stop = io_log_global_stop,
3711 .eventfd_add = io_eventfd_add,
3712 .eventfd_del = io_eventfd_del,
3713 .priority = 0,
3716 static void memory_map_init(void)
3718 system_memory = g_malloc(sizeof(*system_memory));
3719 memory_region_init(system_memory, "system", INT64_MAX);
3720 set_system_memory_map(system_memory);
3722 system_io = g_malloc(sizeof(*system_io));
3723 memory_region_init(system_io, "io", 65536);
3724 set_system_io_map(system_io);
3726 memory_listener_register(&core_memory_listener, system_memory);
3727 memory_listener_register(&io_memory_listener, system_io);
3730 MemoryRegion *get_system_memory(void)
3732 return system_memory;
3735 MemoryRegion *get_system_io(void)
3737 return system_io;
3740 #endif /* !defined(CONFIG_USER_ONLY) */
3742 /* physical memory access (slow version, mainly for debug) */
3743 #if defined(CONFIG_USER_ONLY)
3744 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3745 uint8_t *buf, int len, int is_write)
3747 int l, flags;
3748 target_ulong page;
3749 void * p;
3751 while (len > 0) {
3752 page = addr & TARGET_PAGE_MASK;
3753 l = (page + TARGET_PAGE_SIZE) - addr;
3754 if (l > len)
3755 l = len;
3756 flags = page_get_flags(page);
3757 if (!(flags & PAGE_VALID))
3758 return -1;
3759 if (is_write) {
3760 if (!(flags & PAGE_WRITE))
3761 return -1;
3762 /* XXX: this code should not depend on lock_user */
3763 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3764 return -1;
3765 memcpy(p, buf, l);
3766 unlock_user(p, addr, l);
3767 } else {
3768 if (!(flags & PAGE_READ))
3769 return -1;
3770 /* XXX: this code should not depend on lock_user */
3771 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3772 return -1;
3773 memcpy(buf, p, l);
3774 unlock_user(p, addr, 0);
3776 len -= l;
3777 buf += l;
3778 addr += l;
3780 return 0;
3783 #else
3784 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3785 int len, int is_write)
3787 int l, io_index;
3788 uint8_t *ptr;
3789 uint32_t val;
3790 target_phys_addr_t page;
3791 ram_addr_t pd;
3792 PhysPageDesc p;
3794 while (len > 0) {
3795 page = addr & TARGET_PAGE_MASK;
3796 l = (page + TARGET_PAGE_SIZE) - addr;
3797 if (l > len)
3798 l = len;
3799 p = phys_page_find(page >> TARGET_PAGE_BITS);
3800 pd = p.phys_offset;
3802 if (is_write) {
3803 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
3804 target_phys_addr_t addr1;
3805 io_index = pd & (IO_MEM_NB_ENTRIES - 1);
3806 addr1 = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
3807 /* XXX: could force cpu_single_env to NULL to avoid
3808 potential bugs */
3809 if (l >= 4 && ((addr1 & 3) == 0)) {
3810 /* 32 bit write access */
3811 val = ldl_p(buf);
3812 io_mem_write(io_index, addr1, val, 4);
3813 l = 4;
3814 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3815 /* 16 bit write access */
3816 val = lduw_p(buf);
3817 io_mem_write(io_index, addr1, val, 2);
3818 l = 2;
3819 } else {
3820 /* 8 bit write access */
3821 val = ldub_p(buf);
3822 io_mem_write(io_index, addr1, val, 1);
3823 l = 1;
3825 } else {
3826 ram_addr_t addr1;
3827 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3828 /* RAM case */
3829 ptr = qemu_get_ram_ptr(addr1);
3830 memcpy(ptr, buf, l);
3831 if (!cpu_physical_memory_is_dirty(addr1)) {
3832 /* invalidate code */
3833 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3834 /* set dirty bit */
3835 cpu_physical_memory_set_dirty_flags(
3836 addr1, (0xff & ~CODE_DIRTY_FLAG));
3838 qemu_put_ram_ptr(ptr);
3840 } else {
3841 if (!is_ram_rom_romd(pd)) {
3842 target_phys_addr_t addr1;
3843 /* I/O case */
3844 io_index = pd & (IO_MEM_NB_ENTRIES - 1);
3845 addr1 = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
3846 if (l >= 4 && ((addr1 & 3) == 0)) {
3847 /* 32 bit read access */
3848 val = io_mem_read(io_index, addr1, 4);
3849 stl_p(buf, val);
3850 l = 4;
3851 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3852 /* 16 bit read access */
3853 val = io_mem_read(io_index, addr1, 2);
3854 stw_p(buf, val);
3855 l = 2;
3856 } else {
3857 /* 8 bit read access */
3858 val = io_mem_read(io_index, addr1, 1);
3859 stb_p(buf, val);
3860 l = 1;
3862 } else {
3863 /* RAM case */
3864 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3865 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3866 qemu_put_ram_ptr(ptr);
3869 len -= l;
3870 buf += l;
3871 addr += l;
3875 /* used for ROM loading : can write in RAM and ROM */
3876 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3877 const uint8_t *buf, int len)
3879 int l;
3880 uint8_t *ptr;
3881 target_phys_addr_t page;
3882 unsigned long pd;
3883 PhysPageDesc p;
3885 while (len > 0) {
3886 page = addr & TARGET_PAGE_MASK;
3887 l = (page + TARGET_PAGE_SIZE) - addr;
3888 if (l > len)
3889 l = len;
3890 p = phys_page_find(page >> TARGET_PAGE_BITS);
3891 pd = p.phys_offset;
3893 if (!is_ram_rom_romd(pd)) {
3894 /* do nothing */
3895 } else {
3896 unsigned long addr1;
3897 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3898 /* ROM/RAM case */
3899 ptr = qemu_get_ram_ptr(addr1);
3900 memcpy(ptr, buf, l);
3901 qemu_put_ram_ptr(ptr);
3903 len -= l;
3904 buf += l;
3905 addr += l;
3909 typedef struct {
3910 void *buffer;
3911 target_phys_addr_t addr;
3912 target_phys_addr_t len;
3913 } BounceBuffer;
3915 static BounceBuffer bounce;
3917 typedef struct MapClient {
3918 void *opaque;
3919 void (*callback)(void *opaque);
3920 QLIST_ENTRY(MapClient) link;
3921 } MapClient;
3923 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3924 = QLIST_HEAD_INITIALIZER(map_client_list);
3926 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3928 MapClient *client = g_malloc(sizeof(*client));
3930 client->opaque = opaque;
3931 client->callback = callback;
3932 QLIST_INSERT_HEAD(&map_client_list, client, link);
3933 return client;
3936 void cpu_unregister_map_client(void *_client)
3938 MapClient *client = (MapClient *)_client;
3940 QLIST_REMOVE(client, link);
3941 g_free(client);
3944 static void cpu_notify_map_clients(void)
3946 MapClient *client;
3948 while (!QLIST_EMPTY(&map_client_list)) {
3949 client = QLIST_FIRST(&map_client_list);
3950 client->callback(client->opaque);
3951 cpu_unregister_map_client(client);
3955 /* Map a physical memory region into a host virtual address.
3956 * May map a subset of the requested range, given by and returned in *plen.
3957 * May return NULL if resources needed to perform the mapping are exhausted.
3958 * Use only for reads OR writes - not for read-modify-write operations.
3959 * Use cpu_register_map_client() to know when retrying the map operation is
3960 * likely to succeed.
3962 void *cpu_physical_memory_map(target_phys_addr_t addr,
3963 target_phys_addr_t *plen,
3964 int is_write)
3966 target_phys_addr_t len = *plen;
3967 target_phys_addr_t todo = 0;
3968 int l;
3969 target_phys_addr_t page;
3970 unsigned long pd;
3971 PhysPageDesc p;
3972 ram_addr_t raddr = RAM_ADDR_MAX;
3973 ram_addr_t rlen;
3974 void *ret;
3976 while (len > 0) {
3977 page = addr & TARGET_PAGE_MASK;
3978 l = (page + TARGET_PAGE_SIZE) - addr;
3979 if (l > len)
3980 l = len;
3981 p = phys_page_find(page >> TARGET_PAGE_BITS);
3982 pd = p.phys_offset;
3984 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
3985 if (todo || bounce.buffer) {
3986 break;
3988 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3989 bounce.addr = addr;
3990 bounce.len = l;
3991 if (!is_write) {
3992 cpu_physical_memory_read(addr, bounce.buffer, l);
3995 *plen = l;
3996 return bounce.buffer;
3998 if (!todo) {
3999 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4002 len -= l;
4003 addr += l;
4004 todo += l;
4006 rlen = todo;
4007 ret = qemu_ram_ptr_length(raddr, &rlen);
4008 *plen = rlen;
4009 return ret;
4012 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4013 * Will also mark the memory as dirty if is_write == 1. access_len gives
4014 * the amount of memory that was actually read or written by the caller.
4016 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4017 int is_write, target_phys_addr_t access_len)
4019 if (buffer != bounce.buffer) {
4020 if (is_write) {
4021 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4022 while (access_len) {
4023 unsigned l;
4024 l = TARGET_PAGE_SIZE;
4025 if (l > access_len)
4026 l = access_len;
4027 if (!cpu_physical_memory_is_dirty(addr1)) {
4028 /* invalidate code */
4029 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4030 /* set dirty bit */
4031 cpu_physical_memory_set_dirty_flags(
4032 addr1, (0xff & ~CODE_DIRTY_FLAG));
4034 addr1 += l;
4035 access_len -= l;
4038 if (xen_enabled()) {
4039 xen_invalidate_map_cache_entry(buffer);
4041 return;
4043 if (is_write) {
4044 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4046 qemu_vfree(bounce.buffer);
4047 bounce.buffer = NULL;
4048 cpu_notify_map_clients();
4051 /* warning: addr must be aligned */
4052 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4053 enum device_endian endian)
4055 int io_index;
4056 uint8_t *ptr;
4057 uint32_t val;
4058 unsigned long pd;
4059 PhysPageDesc p;
4061 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4062 pd = p.phys_offset;
4064 if (!is_ram_rom_romd(pd)) {
4065 /* I/O case */
4066 io_index = pd & (IO_MEM_NB_ENTRIES - 1);
4067 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4068 val = io_mem_read(io_index, addr, 4);
4069 #if defined(TARGET_WORDS_BIGENDIAN)
4070 if (endian == DEVICE_LITTLE_ENDIAN) {
4071 val = bswap32(val);
4073 #else
4074 if (endian == DEVICE_BIG_ENDIAN) {
4075 val = bswap32(val);
4077 #endif
4078 } else {
4079 /* RAM case */
4080 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4081 (addr & ~TARGET_PAGE_MASK);
4082 switch (endian) {
4083 case DEVICE_LITTLE_ENDIAN:
4084 val = ldl_le_p(ptr);
4085 break;
4086 case DEVICE_BIG_ENDIAN:
4087 val = ldl_be_p(ptr);
4088 break;
4089 default:
4090 val = ldl_p(ptr);
4091 break;
4094 return val;
4097 uint32_t ldl_phys(target_phys_addr_t addr)
4099 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4102 uint32_t ldl_le_phys(target_phys_addr_t addr)
4104 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4107 uint32_t ldl_be_phys(target_phys_addr_t addr)
4109 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4112 /* warning: addr must be aligned */
4113 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4114 enum device_endian endian)
4116 int io_index;
4117 uint8_t *ptr;
4118 uint64_t val;
4119 unsigned long pd;
4120 PhysPageDesc p;
4122 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4123 pd = p.phys_offset;
4125 if (!is_ram_rom_romd(pd)) {
4126 /* I/O case */
4127 io_index = pd & (IO_MEM_NB_ENTRIES - 1);
4128 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4130 /* XXX This is broken when device endian != cpu endian.
4131 Fix and add "endian" variable check */
4132 #ifdef TARGET_WORDS_BIGENDIAN
4133 val = io_mem_read(io_index, addr, 4) << 32;
4134 val |= io_mem_read(io_index, addr + 4, 4);
4135 #else
4136 val = io_mem_read(io_index, addr, 4);
4137 val |= io_mem_read(io_index, addr + 4, 4) << 32;
4138 #endif
4139 } else {
4140 /* RAM case */
4141 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4142 (addr & ~TARGET_PAGE_MASK);
4143 switch (endian) {
4144 case DEVICE_LITTLE_ENDIAN:
4145 val = ldq_le_p(ptr);
4146 break;
4147 case DEVICE_BIG_ENDIAN:
4148 val = ldq_be_p(ptr);
4149 break;
4150 default:
4151 val = ldq_p(ptr);
4152 break;
4155 return val;
4158 uint64_t ldq_phys(target_phys_addr_t addr)
4160 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4163 uint64_t ldq_le_phys(target_phys_addr_t addr)
4165 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4168 uint64_t ldq_be_phys(target_phys_addr_t addr)
4170 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4173 /* XXX: optimize */
4174 uint32_t ldub_phys(target_phys_addr_t addr)
4176 uint8_t val;
4177 cpu_physical_memory_read(addr, &val, 1);
4178 return val;
4181 /* warning: addr must be aligned */
4182 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4183 enum device_endian endian)
4185 int io_index;
4186 uint8_t *ptr;
4187 uint64_t val;
4188 unsigned long pd;
4189 PhysPageDesc p;
4191 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4192 pd = p.phys_offset;
4194 if (!is_ram_rom_romd(pd)) {
4195 /* I/O case */
4196 io_index = pd & (IO_MEM_NB_ENTRIES - 1);
4197 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4198 val = io_mem_read(io_index, addr, 2);
4199 #if defined(TARGET_WORDS_BIGENDIAN)
4200 if (endian == DEVICE_LITTLE_ENDIAN) {
4201 val = bswap16(val);
4203 #else
4204 if (endian == DEVICE_BIG_ENDIAN) {
4205 val = bswap16(val);
4207 #endif
4208 } else {
4209 /* RAM case */
4210 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4211 (addr & ~TARGET_PAGE_MASK);
4212 switch (endian) {
4213 case DEVICE_LITTLE_ENDIAN:
4214 val = lduw_le_p(ptr);
4215 break;
4216 case DEVICE_BIG_ENDIAN:
4217 val = lduw_be_p(ptr);
4218 break;
4219 default:
4220 val = lduw_p(ptr);
4221 break;
4224 return val;
4227 uint32_t lduw_phys(target_phys_addr_t addr)
4229 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4232 uint32_t lduw_le_phys(target_phys_addr_t addr)
4234 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4237 uint32_t lduw_be_phys(target_phys_addr_t addr)
4239 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4242 /* warning: addr must be aligned. The ram page is not masked as dirty
4243 and the code inside is not invalidated. It is useful if the dirty
4244 bits are used to track modified PTEs */
4245 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4247 int io_index;
4248 uint8_t *ptr;
4249 unsigned long pd;
4250 PhysPageDesc p;
4252 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4253 pd = p.phys_offset;
4255 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
4256 io_index = pd & (IO_MEM_NB_ENTRIES - 1);
4257 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4258 io_mem_write(io_index, addr, val, 4);
4259 } else {
4260 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4261 ptr = qemu_get_ram_ptr(addr1);
4262 stl_p(ptr, val);
4264 if (unlikely(in_migration)) {
4265 if (!cpu_physical_memory_is_dirty(addr1)) {
4266 /* invalidate code */
4267 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4268 /* set dirty bit */
4269 cpu_physical_memory_set_dirty_flags(
4270 addr1, (0xff & ~CODE_DIRTY_FLAG));
4276 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4278 int io_index;
4279 uint8_t *ptr;
4280 unsigned long pd;
4281 PhysPageDesc p;
4283 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4284 pd = p.phys_offset;
4286 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
4287 io_index = pd & (IO_MEM_NB_ENTRIES - 1);
4288 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4289 #ifdef TARGET_WORDS_BIGENDIAN
4290 io_mem_write(io_index, addr, val >> 32, 4);
4291 io_mem_write(io_index, addr + 4, (uint32_t)val, 4);
4292 #else
4293 io_mem_write(io_index, addr, (uint32_t)val, 4);
4294 io_mem_write(io_index, addr + 4, val >> 32, 4);
4295 #endif
4296 } else {
4297 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4298 (addr & ~TARGET_PAGE_MASK);
4299 stq_p(ptr, val);
4303 /* warning: addr must be aligned */
4304 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4305 enum device_endian endian)
4307 int io_index;
4308 uint8_t *ptr;
4309 unsigned long pd;
4310 PhysPageDesc p;
4312 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4313 pd = p.phys_offset;
4315 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
4316 io_index = pd & (IO_MEM_NB_ENTRIES - 1);
4317 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4318 #if defined(TARGET_WORDS_BIGENDIAN)
4319 if (endian == DEVICE_LITTLE_ENDIAN) {
4320 val = bswap32(val);
4322 #else
4323 if (endian == DEVICE_BIG_ENDIAN) {
4324 val = bswap32(val);
4326 #endif
4327 io_mem_write(io_index, addr, val, 4);
4328 } else {
4329 unsigned long addr1;
4330 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4331 /* RAM case */
4332 ptr = qemu_get_ram_ptr(addr1);
4333 switch (endian) {
4334 case DEVICE_LITTLE_ENDIAN:
4335 stl_le_p(ptr, val);
4336 break;
4337 case DEVICE_BIG_ENDIAN:
4338 stl_be_p(ptr, val);
4339 break;
4340 default:
4341 stl_p(ptr, val);
4342 break;
4344 if (!cpu_physical_memory_is_dirty(addr1)) {
4345 /* invalidate code */
4346 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4347 /* set dirty bit */
4348 cpu_physical_memory_set_dirty_flags(addr1,
4349 (0xff & ~CODE_DIRTY_FLAG));
4354 void stl_phys(target_phys_addr_t addr, uint32_t val)
4356 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4359 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4361 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4364 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4366 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4369 /* XXX: optimize */
4370 void stb_phys(target_phys_addr_t addr, uint32_t val)
4372 uint8_t v = val;
4373 cpu_physical_memory_write(addr, &v, 1);
4376 /* warning: addr must be aligned */
4377 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4378 enum device_endian endian)
4380 int io_index;
4381 uint8_t *ptr;
4382 unsigned long pd;
4383 PhysPageDesc p;
4385 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4386 pd = p.phys_offset;
4388 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
4389 io_index = pd & (IO_MEM_NB_ENTRIES - 1);
4390 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4391 #if defined(TARGET_WORDS_BIGENDIAN)
4392 if (endian == DEVICE_LITTLE_ENDIAN) {
4393 val = bswap16(val);
4395 #else
4396 if (endian == DEVICE_BIG_ENDIAN) {
4397 val = bswap16(val);
4399 #endif
4400 io_mem_write(io_index, addr, val, 2);
4401 } else {
4402 unsigned long addr1;
4403 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4404 /* RAM case */
4405 ptr = qemu_get_ram_ptr(addr1);
4406 switch (endian) {
4407 case DEVICE_LITTLE_ENDIAN:
4408 stw_le_p(ptr, val);
4409 break;
4410 case DEVICE_BIG_ENDIAN:
4411 stw_be_p(ptr, val);
4412 break;
4413 default:
4414 stw_p(ptr, val);
4415 break;
4417 if (!cpu_physical_memory_is_dirty(addr1)) {
4418 /* invalidate code */
4419 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4420 /* set dirty bit */
4421 cpu_physical_memory_set_dirty_flags(addr1,
4422 (0xff & ~CODE_DIRTY_FLAG));
4427 void stw_phys(target_phys_addr_t addr, uint32_t val)
4429 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4432 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4434 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4437 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4439 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4442 /* XXX: optimize */
4443 void stq_phys(target_phys_addr_t addr, uint64_t val)
4445 val = tswap64(val);
4446 cpu_physical_memory_write(addr, &val, 8);
4449 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4451 val = cpu_to_le64(val);
4452 cpu_physical_memory_write(addr, &val, 8);
4455 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4457 val = cpu_to_be64(val);
4458 cpu_physical_memory_write(addr, &val, 8);
4461 /* virtual memory access for debug (includes writing to ROM) */
4462 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4463 uint8_t *buf, int len, int is_write)
4465 int l;
4466 target_phys_addr_t phys_addr;
4467 target_ulong page;
4469 while (len > 0) {
4470 page = addr & TARGET_PAGE_MASK;
4471 phys_addr = cpu_get_phys_page_debug(env, page);
4472 /* if no physical page mapped, return an error */
4473 if (phys_addr == -1)
4474 return -1;
4475 l = (page + TARGET_PAGE_SIZE) - addr;
4476 if (l > len)
4477 l = len;
4478 phys_addr += (addr & ~TARGET_PAGE_MASK);
4479 if (is_write)
4480 cpu_physical_memory_write_rom(phys_addr, buf, l);
4481 else
4482 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4483 len -= l;
4484 buf += l;
4485 addr += l;
4487 return 0;
4489 #endif
4491 /* in deterministic execution mode, instructions doing device I/Os
4492 must be at the end of the TB */
4493 void cpu_io_recompile(CPUState *env, void *retaddr)
4495 TranslationBlock *tb;
4496 uint32_t n, cflags;
4497 target_ulong pc, cs_base;
4498 uint64_t flags;
4500 tb = tb_find_pc((unsigned long)retaddr);
4501 if (!tb) {
4502 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4503 retaddr);
4505 n = env->icount_decr.u16.low + tb->icount;
4506 cpu_restore_state(tb, env, (unsigned long)retaddr);
4507 /* Calculate how many instructions had been executed before the fault
4508 occurred. */
4509 n = n - env->icount_decr.u16.low;
4510 /* Generate a new TB ending on the I/O insn. */
4511 n++;
4512 /* On MIPS and SH, delay slot instructions can only be restarted if
4513 they were already the first instruction in the TB. If this is not
4514 the first instruction in a TB then re-execute the preceding
4515 branch. */
4516 #if defined(TARGET_MIPS)
4517 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4518 env->active_tc.PC -= 4;
4519 env->icount_decr.u16.low++;
4520 env->hflags &= ~MIPS_HFLAG_BMASK;
4522 #elif defined(TARGET_SH4)
4523 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4524 && n > 1) {
4525 env->pc -= 2;
4526 env->icount_decr.u16.low++;
4527 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4529 #endif
4530 /* This should never happen. */
4531 if (n > CF_COUNT_MASK)
4532 cpu_abort(env, "TB too big during recompile");
4534 cflags = n | CF_LAST_IO;
4535 pc = tb->pc;
4536 cs_base = tb->cs_base;
4537 flags = tb->flags;
4538 tb_phys_invalidate(tb, -1);
4539 /* FIXME: In theory this could raise an exception. In practice
4540 we have already translated the block once so it's probably ok. */
4541 tb_gen_code(env, pc, cs_base, flags, cflags);
4542 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4543 the first in the TB) then we end up generating a whole new TB and
4544 repeating the fault, which is horribly inefficient.
4545 Better would be to execute just this insn uncached, or generate a
4546 second new TB. */
4547 cpu_resume_from_signal(env, NULL);
4550 #if !defined(CONFIG_USER_ONLY)
4552 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4554 int i, target_code_size, max_target_code_size;
4555 int direct_jmp_count, direct_jmp2_count, cross_page;
4556 TranslationBlock *tb;
4558 target_code_size = 0;
4559 max_target_code_size = 0;
4560 cross_page = 0;
4561 direct_jmp_count = 0;
4562 direct_jmp2_count = 0;
4563 for(i = 0; i < nb_tbs; i++) {
4564 tb = &tbs[i];
4565 target_code_size += tb->size;
4566 if (tb->size > max_target_code_size)
4567 max_target_code_size = tb->size;
4568 if (tb->page_addr[1] != -1)
4569 cross_page++;
4570 if (tb->tb_next_offset[0] != 0xffff) {
4571 direct_jmp_count++;
4572 if (tb->tb_next_offset[1] != 0xffff) {
4573 direct_jmp2_count++;
4577 /* XXX: avoid using doubles ? */
4578 cpu_fprintf(f, "Translation buffer state:\n");
4579 cpu_fprintf(f, "gen code size %td/%ld\n",
4580 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4581 cpu_fprintf(f, "TB count %d/%d\n",
4582 nb_tbs, code_gen_max_blocks);
4583 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4584 nb_tbs ? target_code_size / nb_tbs : 0,
4585 max_target_code_size);
4586 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4587 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4588 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4589 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4590 cross_page,
4591 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4592 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4593 direct_jmp_count,
4594 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4595 direct_jmp2_count,
4596 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4597 cpu_fprintf(f, "\nStatistics:\n");
4598 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4599 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4600 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4601 tcg_dump_info(f, cpu_fprintf);
4604 /* NOTE: this function can trigger an exception */
4605 /* NOTE2: the returned address is not exactly the physical address: it
4606 is the offset relative to phys_ram_base */
4607 tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
4609 int mmu_idx, page_index, pd;
4610 void *p;
4612 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
4613 mmu_idx = cpu_mmu_index(env1);
4614 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
4615 (addr & TARGET_PAGE_MASK))) {
4616 ldub_code(addr);
4618 pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
4619 if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
4620 && !is_romd(pd)) {
4621 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
4622 cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
4623 #else
4624 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
4625 #endif
4627 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
4628 return qemu_ram_addr_from_host_nofail(p);
4632 * A helper function for the _utterly broken_ virtio device model to find out if
4633 * it's running on a big endian machine. Don't do this at home kids!
4635 bool virtio_is_big_endian(void);
4636 bool virtio_is_big_endian(void)
4638 #if defined(TARGET_WORDS_BIGENDIAN)
4639 return true;
4640 #else
4641 return false;
4642 #endif
4645 #define MMUSUFFIX _cmmu
4646 #undef GETPC
4647 #define GETPC() NULL
4648 #define env cpu_single_env
4649 #define SOFTMMU_CODE_ACCESS
4651 #define SHIFT 0
4652 #include "softmmu_template.h"
4654 #define SHIFT 1
4655 #include "softmmu_template.h"
4657 #define SHIFT 2
4658 #include "softmmu_template.h"
4660 #define SHIFT 3
4661 #include "softmmu_template.h"
4663 #undef env
4665 #endif