memory: switch phys_page_set() to a recursive implementation
[qemu/kevin.git] / exec.c
blobf4cd867d51d0b99c8b286c63469eb6894cfa7569
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #define WANT_EXEC_OBSOLETE
61 #include "exec-obsolete.h"
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
118 static MemoryRegion *system_memory;
119 static MemoryRegion *system_io;
121 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
122 static MemoryRegion io_mem_subpage_ram;
124 #endif
126 CPUState *first_cpu;
127 /* current CPU in the current thread. It is only valid inside
128 cpu_exec() */
129 DEFINE_TLS(CPUState *,cpu_single_env);
130 /* 0 = Do not count executed instructions.
131 1 = Precise instruction counting.
132 2 = Adaptive rate instruction counting. */
133 int use_icount = 0;
135 typedef struct PageDesc {
136 /* list of TBs intersecting this ram page */
137 TranslationBlock *first_tb;
138 /* in order to optimize self modifying code, we count the number
139 of lookups we do to a given page to use a bitmap */
140 unsigned int code_write_count;
141 uint8_t *code_bitmap;
142 #if defined(CONFIG_USER_ONLY)
143 unsigned long flags;
144 #endif
145 } PageDesc;
147 /* In system mode we want L1_MAP to be based on ram offsets,
148 while in user mode we want it to be based on virtual addresses. */
149 #if !defined(CONFIG_USER_ONLY)
150 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
151 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
154 #endif
155 #else
156 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
157 #endif
159 /* Size of the L2 (and L3, etc) page tables. */
160 #define L2_BITS 10
161 #define L2_SIZE (1 << L2_BITS)
163 #define P_L2_LEVELS \
164 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
166 /* The bits remaining after N lower levels of page tables. */
167 #define V_L1_BITS_REM \
168 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
170 #if V_L1_BITS_REM < 4
171 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
172 #else
173 #define V_L1_BITS V_L1_BITS_REM
174 #endif
176 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
178 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
180 unsigned long qemu_real_host_page_size;
181 unsigned long qemu_host_page_size;
182 unsigned long qemu_host_page_mask;
184 /* This is a multi-level map on the virtual address space.
185 The bottom level has pointers to PageDesc. */
186 static void *l1_map[V_L1_SIZE];
188 #if !defined(CONFIG_USER_ONLY)
189 typedef struct PhysPageEntry PhysPageEntry;
191 static MemoryRegionSection *phys_sections;
192 static unsigned phys_sections_nb, phys_sections_nb_alloc;
193 static uint16_t phys_section_unassigned;
195 struct PhysPageEntry {
196 union {
197 uint16_t leaf; /* index into phys_sections */
198 uint16_t node; /* index into phys_map_nodes */
199 } u;
202 /* Simple allocator for PhysPageEntry nodes */
203 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
204 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206 #define PHYS_MAP_NODE_NIL ((uint16_t)~0)
208 /* This is a multi-level map on the physical address space.
209 The bottom level has pointers to MemoryRegionSections. */
210 static PhysPageEntry phys_map = { .u.node = PHYS_MAP_NODE_NIL };
212 static void io_mem_init(void);
213 static void memory_map_init(void);
215 /* io memory support */
216 MemoryRegion *io_mem_region[IO_MEM_NB_ENTRIES];
217 static char io_mem_used[IO_MEM_NB_ENTRIES];
218 static MemoryRegion io_mem_watch;
219 #endif
221 /* log support */
222 #ifdef WIN32
223 static const char *logfilename = "qemu.log";
224 #else
225 static const char *logfilename = "/tmp/qemu.log";
226 #endif
227 FILE *logfile;
228 int loglevel;
229 static int log_append = 0;
231 /* statistics */
232 #if !defined(CONFIG_USER_ONLY)
233 static int tlb_flush_count;
234 #endif
235 static int tb_flush_count;
236 static int tb_phys_invalidate_count;
238 #ifdef _WIN32
239 static void map_exec(void *addr, long size)
241 DWORD old_protect;
242 VirtualProtect(addr, size,
243 PAGE_EXECUTE_READWRITE, &old_protect);
246 #else
247 static void map_exec(void *addr, long size)
249 unsigned long start, end, page_size;
251 page_size = getpagesize();
252 start = (unsigned long)addr;
253 start &= ~(page_size - 1);
255 end = (unsigned long)addr + size;
256 end += page_size - 1;
257 end &= ~(page_size - 1);
259 mprotect((void *)start, end - start,
260 PROT_READ | PROT_WRITE | PROT_EXEC);
262 #endif
264 static void page_init(void)
266 /* NOTE: we can always suppose that qemu_host_page_size >=
267 TARGET_PAGE_SIZE */
268 #ifdef _WIN32
270 SYSTEM_INFO system_info;
272 GetSystemInfo(&system_info);
273 qemu_real_host_page_size = system_info.dwPageSize;
275 #else
276 qemu_real_host_page_size = getpagesize();
277 #endif
278 if (qemu_host_page_size == 0)
279 qemu_host_page_size = qemu_real_host_page_size;
280 if (qemu_host_page_size < TARGET_PAGE_SIZE)
281 qemu_host_page_size = TARGET_PAGE_SIZE;
282 qemu_host_page_mask = ~(qemu_host_page_size - 1);
284 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
286 #ifdef HAVE_KINFO_GETVMMAP
287 struct kinfo_vmentry *freep;
288 int i, cnt;
290 freep = kinfo_getvmmap(getpid(), &cnt);
291 if (freep) {
292 mmap_lock();
293 for (i = 0; i < cnt; i++) {
294 unsigned long startaddr, endaddr;
296 startaddr = freep[i].kve_start;
297 endaddr = freep[i].kve_end;
298 if (h2g_valid(startaddr)) {
299 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
301 if (h2g_valid(endaddr)) {
302 endaddr = h2g(endaddr);
303 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
304 } else {
305 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
306 endaddr = ~0ul;
307 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
308 #endif
312 free(freep);
313 mmap_unlock();
315 #else
316 FILE *f;
318 last_brk = (unsigned long)sbrk(0);
320 f = fopen("/compat/linux/proc/self/maps", "r");
321 if (f) {
322 mmap_lock();
324 do {
325 unsigned long startaddr, endaddr;
326 int n;
328 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
330 if (n == 2 && h2g_valid(startaddr)) {
331 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
333 if (h2g_valid(endaddr)) {
334 endaddr = h2g(endaddr);
335 } else {
336 endaddr = ~0ul;
338 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
340 } while (!feof(f));
342 fclose(f);
343 mmap_unlock();
345 #endif
347 #endif
350 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
352 PageDesc *pd;
353 void **lp;
354 int i;
356 #if defined(CONFIG_USER_ONLY)
357 /* We can't use g_malloc because it may recurse into a locked mutex. */
358 # define ALLOC(P, SIZE) \
359 do { \
360 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
361 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
362 } while (0)
363 #else
364 # define ALLOC(P, SIZE) \
365 do { P = g_malloc0(SIZE); } while (0)
366 #endif
368 /* Level 1. Always allocated. */
369 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
371 /* Level 2..N-1. */
372 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
373 void **p = *lp;
375 if (p == NULL) {
376 if (!alloc) {
377 return NULL;
379 ALLOC(p, sizeof(void *) * L2_SIZE);
380 *lp = p;
383 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
386 pd = *lp;
387 if (pd == NULL) {
388 if (!alloc) {
389 return NULL;
391 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
392 *lp = pd;
395 #undef ALLOC
397 return pd + (index & (L2_SIZE - 1));
400 static inline PageDesc *page_find(tb_page_addr_t index)
402 return page_find_alloc(index, 0);
405 #if !defined(CONFIG_USER_ONLY)
407 static void phys_map_node_reserve(unsigned nodes)
409 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
410 typedef PhysPageEntry Node[L2_SIZE];
411 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
412 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
413 phys_map_nodes_nb + nodes);
414 phys_map_nodes = g_renew(Node, phys_map_nodes,
415 phys_map_nodes_nb_alloc);
419 static uint16_t phys_map_node_alloc(void)
421 unsigned i;
422 uint16_t ret;
424 ret = phys_map_nodes_nb++;
425 assert(ret != PHYS_MAP_NODE_NIL);
426 assert(ret != phys_map_nodes_nb_alloc);
427 for (i = 0; i < L2_SIZE; ++i) {
428 phys_map_nodes[ret][i].u.node = PHYS_MAP_NODE_NIL;
430 return ret;
433 static void phys_map_nodes_reset(void)
435 phys_map_nodes_nb = 0;
439 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t index,
440 uint16_t leaf, int level)
442 PhysPageEntry *p;
443 int i;
445 if (lp->u.node == PHYS_MAP_NODE_NIL) {
446 lp->u.node = phys_map_node_alloc();
447 p = phys_map_nodes[lp->u.node];
448 if (level == 0) {
449 for (i = 0; i < L2_SIZE; i++) {
450 p[i].u.leaf = phys_section_unassigned;
453 } else {
454 p = phys_map_nodes[lp->u.node];
456 lp = &p[(index >> (level * L2_BITS)) & (L2_SIZE - 1)];
458 if (level == 0) {
459 lp->u.leaf = leaf;
460 } else {
461 phys_page_set_level(lp, index, leaf, level - 1);
465 static void phys_page_set(target_phys_addr_t index, uint16_t leaf)
467 phys_map_node_reserve(P_L2_LEVELS);
469 phys_page_set_level(&phys_map, index, leaf, P_L2_LEVELS - 1);
472 static MemoryRegionSection phys_page_find(target_phys_addr_t index)
474 PhysPageEntry lp = phys_map;
475 PhysPageEntry *p;
476 int i;
477 MemoryRegionSection section;
478 target_phys_addr_t delta;
479 uint16_t s_index = phys_section_unassigned;
481 for (i = P_L2_LEVELS - 1; i >= 0; i--) {
482 if (lp.u.node == PHYS_MAP_NODE_NIL) {
483 goto not_found;
485 p = phys_map_nodes[lp.u.node];
486 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
489 s_index = lp.u.leaf;
490 not_found:
491 section = phys_sections[s_index];
492 index <<= TARGET_PAGE_BITS;
493 assert(section.offset_within_address_space <= index
494 && index <= section.offset_within_address_space + section.size-1);
495 delta = index - section.offset_within_address_space;
496 section.offset_within_address_space += delta;
497 section.offset_within_region += delta;
498 section.size -= delta;
499 return section;
502 static void tlb_protect_code(ram_addr_t ram_addr);
503 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
504 target_ulong vaddr);
505 #define mmap_lock() do { } while(0)
506 #define mmap_unlock() do { } while(0)
507 #endif
509 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
511 #if defined(CONFIG_USER_ONLY)
512 /* Currently it is not recommended to allocate big chunks of data in
513 user mode. It will change when a dedicated libc will be used */
514 #define USE_STATIC_CODE_GEN_BUFFER
515 #endif
517 #ifdef USE_STATIC_CODE_GEN_BUFFER
518 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
519 __attribute__((aligned (CODE_GEN_ALIGN)));
520 #endif
522 static void code_gen_alloc(unsigned long tb_size)
524 #ifdef USE_STATIC_CODE_GEN_BUFFER
525 code_gen_buffer = static_code_gen_buffer;
526 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
527 map_exec(code_gen_buffer, code_gen_buffer_size);
528 #else
529 code_gen_buffer_size = tb_size;
530 if (code_gen_buffer_size == 0) {
531 #if defined(CONFIG_USER_ONLY)
532 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
533 #else
534 /* XXX: needs adjustments */
535 code_gen_buffer_size = (unsigned long)(ram_size / 4);
536 #endif
538 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
539 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
540 /* The code gen buffer location may have constraints depending on
541 the host cpu and OS */
542 #if defined(__linux__)
544 int flags;
545 void *start = NULL;
547 flags = MAP_PRIVATE | MAP_ANONYMOUS;
548 #if defined(__x86_64__)
549 flags |= MAP_32BIT;
550 /* Cannot map more than that */
551 if (code_gen_buffer_size > (800 * 1024 * 1024))
552 code_gen_buffer_size = (800 * 1024 * 1024);
553 #elif defined(__sparc_v9__)
554 // Map the buffer below 2G, so we can use direct calls and branches
555 flags |= MAP_FIXED;
556 start = (void *) 0x60000000UL;
557 if (code_gen_buffer_size > (512 * 1024 * 1024))
558 code_gen_buffer_size = (512 * 1024 * 1024);
559 #elif defined(__arm__)
560 /* Keep the buffer no bigger than 16MB to branch between blocks */
561 if (code_gen_buffer_size > 16 * 1024 * 1024)
562 code_gen_buffer_size = 16 * 1024 * 1024;
563 #elif defined(__s390x__)
564 /* Map the buffer so that we can use direct calls and branches. */
565 /* We have a +- 4GB range on the branches; leave some slop. */
566 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
567 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
569 start = (void *)0x90000000UL;
570 #endif
571 code_gen_buffer = mmap(start, code_gen_buffer_size,
572 PROT_WRITE | PROT_READ | PROT_EXEC,
573 flags, -1, 0);
574 if (code_gen_buffer == MAP_FAILED) {
575 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
576 exit(1);
579 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
580 || defined(__DragonFly__) || defined(__OpenBSD__) \
581 || defined(__NetBSD__)
583 int flags;
584 void *addr = NULL;
585 flags = MAP_PRIVATE | MAP_ANONYMOUS;
586 #if defined(__x86_64__)
587 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
588 * 0x40000000 is free */
589 flags |= MAP_FIXED;
590 addr = (void *)0x40000000;
591 /* Cannot map more than that */
592 if (code_gen_buffer_size > (800 * 1024 * 1024))
593 code_gen_buffer_size = (800 * 1024 * 1024);
594 #elif defined(__sparc_v9__)
595 // Map the buffer below 2G, so we can use direct calls and branches
596 flags |= MAP_FIXED;
597 addr = (void *) 0x60000000UL;
598 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
599 code_gen_buffer_size = (512 * 1024 * 1024);
601 #endif
602 code_gen_buffer = mmap(addr, code_gen_buffer_size,
603 PROT_WRITE | PROT_READ | PROT_EXEC,
604 flags, -1, 0);
605 if (code_gen_buffer == MAP_FAILED) {
606 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
607 exit(1);
610 #else
611 code_gen_buffer = g_malloc(code_gen_buffer_size);
612 map_exec(code_gen_buffer, code_gen_buffer_size);
613 #endif
614 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
615 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
616 code_gen_buffer_max_size = code_gen_buffer_size -
617 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
618 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
619 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
622 /* Must be called before using the QEMU cpus. 'tb_size' is the size
623 (in bytes) allocated to the translation buffer. Zero means default
624 size. */
625 void tcg_exec_init(unsigned long tb_size)
627 cpu_gen_init();
628 code_gen_alloc(tb_size);
629 code_gen_ptr = code_gen_buffer;
630 page_init();
631 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
632 /* There's no guest base to take into account, so go ahead and
633 initialize the prologue now. */
634 tcg_prologue_init(&tcg_ctx);
635 #endif
638 bool tcg_enabled(void)
640 return code_gen_buffer != NULL;
643 void cpu_exec_init_all(void)
645 #if !defined(CONFIG_USER_ONLY)
646 memory_map_init();
647 io_mem_init();
648 #endif
651 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
653 static int cpu_common_post_load(void *opaque, int version_id)
655 CPUState *env = opaque;
657 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
658 version_id is increased. */
659 env->interrupt_request &= ~0x01;
660 tlb_flush(env, 1);
662 return 0;
665 static const VMStateDescription vmstate_cpu_common = {
666 .name = "cpu_common",
667 .version_id = 1,
668 .minimum_version_id = 1,
669 .minimum_version_id_old = 1,
670 .post_load = cpu_common_post_load,
671 .fields = (VMStateField []) {
672 VMSTATE_UINT32(halted, CPUState),
673 VMSTATE_UINT32(interrupt_request, CPUState),
674 VMSTATE_END_OF_LIST()
677 #endif
679 CPUState *qemu_get_cpu(int cpu)
681 CPUState *env = first_cpu;
683 while (env) {
684 if (env->cpu_index == cpu)
685 break;
686 env = env->next_cpu;
689 return env;
692 void cpu_exec_init(CPUState *env)
694 CPUState **penv;
695 int cpu_index;
697 #if defined(CONFIG_USER_ONLY)
698 cpu_list_lock();
699 #endif
700 env->next_cpu = NULL;
701 penv = &first_cpu;
702 cpu_index = 0;
703 while (*penv != NULL) {
704 penv = &(*penv)->next_cpu;
705 cpu_index++;
707 env->cpu_index = cpu_index;
708 env->numa_node = 0;
709 QTAILQ_INIT(&env->breakpoints);
710 QTAILQ_INIT(&env->watchpoints);
711 #ifndef CONFIG_USER_ONLY
712 env->thread_id = qemu_get_thread_id();
713 #endif
714 *penv = env;
715 #if defined(CONFIG_USER_ONLY)
716 cpu_list_unlock();
717 #endif
718 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
719 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
720 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
721 cpu_save, cpu_load, env);
722 #endif
725 /* Allocate a new translation block. Flush the translation buffer if
726 too many translation blocks or too much generated code. */
727 static TranslationBlock *tb_alloc(target_ulong pc)
729 TranslationBlock *tb;
731 if (nb_tbs >= code_gen_max_blocks ||
732 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
733 return NULL;
734 tb = &tbs[nb_tbs++];
735 tb->pc = pc;
736 tb->cflags = 0;
737 return tb;
740 void tb_free(TranslationBlock *tb)
742 /* In practice this is mostly used for single use temporary TB
743 Ignore the hard cases and just back up if this TB happens to
744 be the last one generated. */
745 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
746 code_gen_ptr = tb->tc_ptr;
747 nb_tbs--;
751 static inline void invalidate_page_bitmap(PageDesc *p)
753 if (p->code_bitmap) {
754 g_free(p->code_bitmap);
755 p->code_bitmap = NULL;
757 p->code_write_count = 0;
760 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
762 static void page_flush_tb_1 (int level, void **lp)
764 int i;
766 if (*lp == NULL) {
767 return;
769 if (level == 0) {
770 PageDesc *pd = *lp;
771 for (i = 0; i < L2_SIZE; ++i) {
772 pd[i].first_tb = NULL;
773 invalidate_page_bitmap(pd + i);
775 } else {
776 void **pp = *lp;
777 for (i = 0; i < L2_SIZE; ++i) {
778 page_flush_tb_1 (level - 1, pp + i);
783 static void page_flush_tb(void)
785 int i;
786 for (i = 0; i < V_L1_SIZE; i++) {
787 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
791 /* flush all the translation blocks */
792 /* XXX: tb_flush is currently not thread safe */
793 void tb_flush(CPUState *env1)
795 CPUState *env;
796 #if defined(DEBUG_FLUSH)
797 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
798 (unsigned long)(code_gen_ptr - code_gen_buffer),
799 nb_tbs, nb_tbs > 0 ?
800 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
801 #endif
802 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
803 cpu_abort(env1, "Internal error: code buffer overflow\n");
805 nb_tbs = 0;
807 for(env = first_cpu; env != NULL; env = env->next_cpu) {
808 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
811 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
812 page_flush_tb();
814 code_gen_ptr = code_gen_buffer;
815 /* XXX: flush processor icache at this point if cache flush is
816 expensive */
817 tb_flush_count++;
820 #ifdef DEBUG_TB_CHECK
822 static void tb_invalidate_check(target_ulong address)
824 TranslationBlock *tb;
825 int i;
826 address &= TARGET_PAGE_MASK;
827 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
828 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
829 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
830 address >= tb->pc + tb->size)) {
831 printf("ERROR invalidate: address=" TARGET_FMT_lx
832 " PC=%08lx size=%04x\n",
833 address, (long)tb->pc, tb->size);
839 /* verify that all the pages have correct rights for code */
840 static void tb_page_check(void)
842 TranslationBlock *tb;
843 int i, flags1, flags2;
845 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
846 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
847 flags1 = page_get_flags(tb->pc);
848 flags2 = page_get_flags(tb->pc + tb->size - 1);
849 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
850 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
851 (long)tb->pc, tb->size, flags1, flags2);
857 #endif
859 /* invalidate one TB */
860 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
861 int next_offset)
863 TranslationBlock *tb1;
864 for(;;) {
865 tb1 = *ptb;
866 if (tb1 == tb) {
867 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
868 break;
870 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
874 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
876 TranslationBlock *tb1;
877 unsigned int n1;
879 for(;;) {
880 tb1 = *ptb;
881 n1 = (long)tb1 & 3;
882 tb1 = (TranslationBlock *)((long)tb1 & ~3);
883 if (tb1 == tb) {
884 *ptb = tb1->page_next[n1];
885 break;
887 ptb = &tb1->page_next[n1];
891 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
893 TranslationBlock *tb1, **ptb;
894 unsigned int n1;
896 ptb = &tb->jmp_next[n];
897 tb1 = *ptb;
898 if (tb1) {
899 /* find tb(n) in circular list */
900 for(;;) {
901 tb1 = *ptb;
902 n1 = (long)tb1 & 3;
903 tb1 = (TranslationBlock *)((long)tb1 & ~3);
904 if (n1 == n && tb1 == tb)
905 break;
906 if (n1 == 2) {
907 ptb = &tb1->jmp_first;
908 } else {
909 ptb = &tb1->jmp_next[n1];
912 /* now we can suppress tb(n) from the list */
913 *ptb = tb->jmp_next[n];
915 tb->jmp_next[n] = NULL;
919 /* reset the jump entry 'n' of a TB so that it is not chained to
920 another TB */
921 static inline void tb_reset_jump(TranslationBlock *tb, int n)
923 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
926 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
928 CPUState *env;
929 PageDesc *p;
930 unsigned int h, n1;
931 tb_page_addr_t phys_pc;
932 TranslationBlock *tb1, *tb2;
934 /* remove the TB from the hash list */
935 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
936 h = tb_phys_hash_func(phys_pc);
937 tb_remove(&tb_phys_hash[h], tb,
938 offsetof(TranslationBlock, phys_hash_next));
940 /* remove the TB from the page list */
941 if (tb->page_addr[0] != page_addr) {
942 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
943 tb_page_remove(&p->first_tb, tb);
944 invalidate_page_bitmap(p);
946 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
947 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
948 tb_page_remove(&p->first_tb, tb);
949 invalidate_page_bitmap(p);
952 tb_invalidated_flag = 1;
954 /* remove the TB from the hash list */
955 h = tb_jmp_cache_hash_func(tb->pc);
956 for(env = first_cpu; env != NULL; env = env->next_cpu) {
957 if (env->tb_jmp_cache[h] == tb)
958 env->tb_jmp_cache[h] = NULL;
961 /* suppress this TB from the two jump lists */
962 tb_jmp_remove(tb, 0);
963 tb_jmp_remove(tb, 1);
965 /* suppress any remaining jumps to this TB */
966 tb1 = tb->jmp_first;
967 for(;;) {
968 n1 = (long)tb1 & 3;
969 if (n1 == 2)
970 break;
971 tb1 = (TranslationBlock *)((long)tb1 & ~3);
972 tb2 = tb1->jmp_next[n1];
973 tb_reset_jump(tb1, n1);
974 tb1->jmp_next[n1] = NULL;
975 tb1 = tb2;
977 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
979 tb_phys_invalidate_count++;
982 static inline void set_bits(uint8_t *tab, int start, int len)
984 int end, mask, end1;
986 end = start + len;
987 tab += start >> 3;
988 mask = 0xff << (start & 7);
989 if ((start & ~7) == (end & ~7)) {
990 if (start < end) {
991 mask &= ~(0xff << (end & 7));
992 *tab |= mask;
994 } else {
995 *tab++ |= mask;
996 start = (start + 8) & ~7;
997 end1 = end & ~7;
998 while (start < end1) {
999 *tab++ = 0xff;
1000 start += 8;
1002 if (start < end) {
1003 mask = ~(0xff << (end & 7));
1004 *tab |= mask;
1009 static void build_page_bitmap(PageDesc *p)
1011 int n, tb_start, tb_end;
1012 TranslationBlock *tb;
1014 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1016 tb = p->first_tb;
1017 while (tb != NULL) {
1018 n = (long)tb & 3;
1019 tb = (TranslationBlock *)((long)tb & ~3);
1020 /* NOTE: this is subtle as a TB may span two physical pages */
1021 if (n == 0) {
1022 /* NOTE: tb_end may be after the end of the page, but
1023 it is not a problem */
1024 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1025 tb_end = tb_start + tb->size;
1026 if (tb_end > TARGET_PAGE_SIZE)
1027 tb_end = TARGET_PAGE_SIZE;
1028 } else {
1029 tb_start = 0;
1030 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1032 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1033 tb = tb->page_next[n];
1037 TranslationBlock *tb_gen_code(CPUState *env,
1038 target_ulong pc, target_ulong cs_base,
1039 int flags, int cflags)
1041 TranslationBlock *tb;
1042 uint8_t *tc_ptr;
1043 tb_page_addr_t phys_pc, phys_page2;
1044 target_ulong virt_page2;
1045 int code_gen_size;
1047 phys_pc = get_page_addr_code(env, pc);
1048 tb = tb_alloc(pc);
1049 if (!tb) {
1050 /* flush must be done */
1051 tb_flush(env);
1052 /* cannot fail at this point */
1053 tb = tb_alloc(pc);
1054 /* Don't forget to invalidate previous TB info. */
1055 tb_invalidated_flag = 1;
1057 tc_ptr = code_gen_ptr;
1058 tb->tc_ptr = tc_ptr;
1059 tb->cs_base = cs_base;
1060 tb->flags = flags;
1061 tb->cflags = cflags;
1062 cpu_gen_code(env, tb, &code_gen_size);
1063 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1065 /* check next page if needed */
1066 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1067 phys_page2 = -1;
1068 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1069 phys_page2 = get_page_addr_code(env, virt_page2);
1071 tb_link_page(tb, phys_pc, phys_page2);
1072 return tb;
1075 /* invalidate all TBs which intersect with the target physical page
1076 starting in range [start;end[. NOTE: start and end must refer to
1077 the same physical page. 'is_cpu_write_access' should be true if called
1078 from a real cpu write access: the virtual CPU will exit the current
1079 TB if code is modified inside this TB. */
1080 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1081 int is_cpu_write_access)
1083 TranslationBlock *tb, *tb_next, *saved_tb;
1084 CPUState *env = cpu_single_env;
1085 tb_page_addr_t tb_start, tb_end;
1086 PageDesc *p;
1087 int n;
1088 #ifdef TARGET_HAS_PRECISE_SMC
1089 int current_tb_not_found = is_cpu_write_access;
1090 TranslationBlock *current_tb = NULL;
1091 int current_tb_modified = 0;
1092 target_ulong current_pc = 0;
1093 target_ulong current_cs_base = 0;
1094 int current_flags = 0;
1095 #endif /* TARGET_HAS_PRECISE_SMC */
1097 p = page_find(start >> TARGET_PAGE_BITS);
1098 if (!p)
1099 return;
1100 if (!p->code_bitmap &&
1101 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1102 is_cpu_write_access) {
1103 /* build code bitmap */
1104 build_page_bitmap(p);
1107 /* we remove all the TBs in the range [start, end[ */
1108 /* XXX: see if in some cases it could be faster to invalidate all the code */
1109 tb = p->first_tb;
1110 while (tb != NULL) {
1111 n = (long)tb & 3;
1112 tb = (TranslationBlock *)((long)tb & ~3);
1113 tb_next = tb->page_next[n];
1114 /* NOTE: this is subtle as a TB may span two physical pages */
1115 if (n == 0) {
1116 /* NOTE: tb_end may be after the end of the page, but
1117 it is not a problem */
1118 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1119 tb_end = tb_start + tb->size;
1120 } else {
1121 tb_start = tb->page_addr[1];
1122 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1124 if (!(tb_end <= start || tb_start >= end)) {
1125 #ifdef TARGET_HAS_PRECISE_SMC
1126 if (current_tb_not_found) {
1127 current_tb_not_found = 0;
1128 current_tb = NULL;
1129 if (env->mem_io_pc) {
1130 /* now we have a real cpu fault */
1131 current_tb = tb_find_pc(env->mem_io_pc);
1134 if (current_tb == tb &&
1135 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1136 /* If we are modifying the current TB, we must stop
1137 its execution. We could be more precise by checking
1138 that the modification is after the current PC, but it
1139 would require a specialized function to partially
1140 restore the CPU state */
1142 current_tb_modified = 1;
1143 cpu_restore_state(current_tb, env, env->mem_io_pc);
1144 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1145 &current_flags);
1147 #endif /* TARGET_HAS_PRECISE_SMC */
1148 /* we need to do that to handle the case where a signal
1149 occurs while doing tb_phys_invalidate() */
1150 saved_tb = NULL;
1151 if (env) {
1152 saved_tb = env->current_tb;
1153 env->current_tb = NULL;
1155 tb_phys_invalidate(tb, -1);
1156 if (env) {
1157 env->current_tb = saved_tb;
1158 if (env->interrupt_request && env->current_tb)
1159 cpu_interrupt(env, env->interrupt_request);
1162 tb = tb_next;
1164 #if !defined(CONFIG_USER_ONLY)
1165 /* if no code remaining, no need to continue to use slow writes */
1166 if (!p->first_tb) {
1167 invalidate_page_bitmap(p);
1168 if (is_cpu_write_access) {
1169 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1172 #endif
1173 #ifdef TARGET_HAS_PRECISE_SMC
1174 if (current_tb_modified) {
1175 /* we generate a block containing just the instruction
1176 modifying the memory. It will ensure that it cannot modify
1177 itself */
1178 env->current_tb = NULL;
1179 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1180 cpu_resume_from_signal(env, NULL);
1182 #endif
1185 /* len must be <= 8 and start must be a multiple of len */
1186 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1188 PageDesc *p;
1189 int offset, b;
1190 #if 0
1191 if (1) {
1192 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1193 cpu_single_env->mem_io_vaddr, len,
1194 cpu_single_env->eip,
1195 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1197 #endif
1198 p = page_find(start >> TARGET_PAGE_BITS);
1199 if (!p)
1200 return;
1201 if (p->code_bitmap) {
1202 offset = start & ~TARGET_PAGE_MASK;
1203 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1204 if (b & ((1 << len) - 1))
1205 goto do_invalidate;
1206 } else {
1207 do_invalidate:
1208 tb_invalidate_phys_page_range(start, start + len, 1);
1212 #if !defined(CONFIG_SOFTMMU)
1213 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1214 unsigned long pc, void *puc)
1216 TranslationBlock *tb;
1217 PageDesc *p;
1218 int n;
1219 #ifdef TARGET_HAS_PRECISE_SMC
1220 TranslationBlock *current_tb = NULL;
1221 CPUState *env = cpu_single_env;
1222 int current_tb_modified = 0;
1223 target_ulong current_pc = 0;
1224 target_ulong current_cs_base = 0;
1225 int current_flags = 0;
1226 #endif
1228 addr &= TARGET_PAGE_MASK;
1229 p = page_find(addr >> TARGET_PAGE_BITS);
1230 if (!p)
1231 return;
1232 tb = p->first_tb;
1233 #ifdef TARGET_HAS_PRECISE_SMC
1234 if (tb && pc != 0) {
1235 current_tb = tb_find_pc(pc);
1237 #endif
1238 while (tb != NULL) {
1239 n = (long)tb & 3;
1240 tb = (TranslationBlock *)((long)tb & ~3);
1241 #ifdef TARGET_HAS_PRECISE_SMC
1242 if (current_tb == tb &&
1243 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1244 /* If we are modifying the current TB, we must stop
1245 its execution. We could be more precise by checking
1246 that the modification is after the current PC, but it
1247 would require a specialized function to partially
1248 restore the CPU state */
1250 current_tb_modified = 1;
1251 cpu_restore_state(current_tb, env, pc);
1252 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1253 &current_flags);
1255 #endif /* TARGET_HAS_PRECISE_SMC */
1256 tb_phys_invalidate(tb, addr);
1257 tb = tb->page_next[n];
1259 p->first_tb = NULL;
1260 #ifdef TARGET_HAS_PRECISE_SMC
1261 if (current_tb_modified) {
1262 /* we generate a block containing just the instruction
1263 modifying the memory. It will ensure that it cannot modify
1264 itself */
1265 env->current_tb = NULL;
1266 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1267 cpu_resume_from_signal(env, puc);
1269 #endif
1271 #endif
1273 /* add the tb in the target page and protect it if necessary */
1274 static inline void tb_alloc_page(TranslationBlock *tb,
1275 unsigned int n, tb_page_addr_t page_addr)
1277 PageDesc *p;
1278 #ifndef CONFIG_USER_ONLY
1279 bool page_already_protected;
1280 #endif
1282 tb->page_addr[n] = page_addr;
1283 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1284 tb->page_next[n] = p->first_tb;
1285 #ifndef CONFIG_USER_ONLY
1286 page_already_protected = p->first_tb != NULL;
1287 #endif
1288 p->first_tb = (TranslationBlock *)((long)tb | n);
1289 invalidate_page_bitmap(p);
1291 #if defined(TARGET_HAS_SMC) || 1
1293 #if defined(CONFIG_USER_ONLY)
1294 if (p->flags & PAGE_WRITE) {
1295 target_ulong addr;
1296 PageDesc *p2;
1297 int prot;
1299 /* force the host page as non writable (writes will have a
1300 page fault + mprotect overhead) */
1301 page_addr &= qemu_host_page_mask;
1302 prot = 0;
1303 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1304 addr += TARGET_PAGE_SIZE) {
1306 p2 = page_find (addr >> TARGET_PAGE_BITS);
1307 if (!p2)
1308 continue;
1309 prot |= p2->flags;
1310 p2->flags &= ~PAGE_WRITE;
1312 mprotect(g2h(page_addr), qemu_host_page_size,
1313 (prot & PAGE_BITS) & ~PAGE_WRITE);
1314 #ifdef DEBUG_TB_INVALIDATE
1315 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1316 page_addr);
1317 #endif
1319 #else
1320 /* if some code is already present, then the pages are already
1321 protected. So we handle the case where only the first TB is
1322 allocated in a physical page */
1323 if (!page_already_protected) {
1324 tlb_protect_code(page_addr);
1326 #endif
1328 #endif /* TARGET_HAS_SMC */
1331 /* add a new TB and link it to the physical page tables. phys_page2 is
1332 (-1) to indicate that only one page contains the TB. */
1333 void tb_link_page(TranslationBlock *tb,
1334 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1336 unsigned int h;
1337 TranslationBlock **ptb;
1339 /* Grab the mmap lock to stop another thread invalidating this TB
1340 before we are done. */
1341 mmap_lock();
1342 /* add in the physical hash table */
1343 h = tb_phys_hash_func(phys_pc);
1344 ptb = &tb_phys_hash[h];
1345 tb->phys_hash_next = *ptb;
1346 *ptb = tb;
1348 /* add in the page list */
1349 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1350 if (phys_page2 != -1)
1351 tb_alloc_page(tb, 1, phys_page2);
1352 else
1353 tb->page_addr[1] = -1;
1355 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1356 tb->jmp_next[0] = NULL;
1357 tb->jmp_next[1] = NULL;
1359 /* init original jump addresses */
1360 if (tb->tb_next_offset[0] != 0xffff)
1361 tb_reset_jump(tb, 0);
1362 if (tb->tb_next_offset[1] != 0xffff)
1363 tb_reset_jump(tb, 1);
1365 #ifdef DEBUG_TB_CHECK
1366 tb_page_check();
1367 #endif
1368 mmap_unlock();
1371 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1372 tb[1].tc_ptr. Return NULL if not found */
1373 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1375 int m_min, m_max, m;
1376 unsigned long v;
1377 TranslationBlock *tb;
1379 if (nb_tbs <= 0)
1380 return NULL;
1381 if (tc_ptr < (unsigned long)code_gen_buffer ||
1382 tc_ptr >= (unsigned long)code_gen_ptr)
1383 return NULL;
1384 /* binary search (cf Knuth) */
1385 m_min = 0;
1386 m_max = nb_tbs - 1;
1387 while (m_min <= m_max) {
1388 m = (m_min + m_max) >> 1;
1389 tb = &tbs[m];
1390 v = (unsigned long)tb->tc_ptr;
1391 if (v == tc_ptr)
1392 return tb;
1393 else if (tc_ptr < v) {
1394 m_max = m - 1;
1395 } else {
1396 m_min = m + 1;
1399 return &tbs[m_max];
1402 static void tb_reset_jump_recursive(TranslationBlock *tb);
1404 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1406 TranslationBlock *tb1, *tb_next, **ptb;
1407 unsigned int n1;
1409 tb1 = tb->jmp_next[n];
1410 if (tb1 != NULL) {
1411 /* find head of list */
1412 for(;;) {
1413 n1 = (long)tb1 & 3;
1414 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1415 if (n1 == 2)
1416 break;
1417 tb1 = tb1->jmp_next[n1];
1419 /* we are now sure now that tb jumps to tb1 */
1420 tb_next = tb1;
1422 /* remove tb from the jmp_first list */
1423 ptb = &tb_next->jmp_first;
1424 for(;;) {
1425 tb1 = *ptb;
1426 n1 = (long)tb1 & 3;
1427 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1428 if (n1 == n && tb1 == tb)
1429 break;
1430 ptb = &tb1->jmp_next[n1];
1432 *ptb = tb->jmp_next[n];
1433 tb->jmp_next[n] = NULL;
1435 /* suppress the jump to next tb in generated code */
1436 tb_reset_jump(tb, n);
1438 /* suppress jumps in the tb on which we could have jumped */
1439 tb_reset_jump_recursive(tb_next);
1443 static void tb_reset_jump_recursive(TranslationBlock *tb)
1445 tb_reset_jump_recursive2(tb, 0);
1446 tb_reset_jump_recursive2(tb, 1);
1449 #if defined(TARGET_HAS_ICE)
1450 #if defined(CONFIG_USER_ONLY)
1451 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1453 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1455 #else
1456 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1458 target_phys_addr_t addr;
1459 ram_addr_t ram_addr;
1460 MemoryRegionSection section;
1462 addr = cpu_get_phys_page_debug(env, pc);
1463 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1464 if (!(memory_region_is_ram(section.mr)
1465 || (section.mr->rom_device && section.mr->readable))) {
1466 return;
1468 ram_addr = (memory_region_get_ram_addr(section.mr)
1469 + section.offset_within_region) & TARGET_PAGE_MASK;
1470 ram_addr |= (pc & ~TARGET_PAGE_MASK);
1471 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1473 #endif
1474 #endif /* TARGET_HAS_ICE */
1476 #if defined(CONFIG_USER_ONLY)
1477 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1482 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1483 int flags, CPUWatchpoint **watchpoint)
1485 return -ENOSYS;
1487 #else
1488 /* Add a watchpoint. */
1489 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1490 int flags, CPUWatchpoint **watchpoint)
1492 target_ulong len_mask = ~(len - 1);
1493 CPUWatchpoint *wp;
1495 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1496 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1497 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1498 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1499 return -EINVAL;
1501 wp = g_malloc(sizeof(*wp));
1503 wp->vaddr = addr;
1504 wp->len_mask = len_mask;
1505 wp->flags = flags;
1507 /* keep all GDB-injected watchpoints in front */
1508 if (flags & BP_GDB)
1509 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1510 else
1511 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1513 tlb_flush_page(env, addr);
1515 if (watchpoint)
1516 *watchpoint = wp;
1517 return 0;
1520 /* Remove a specific watchpoint. */
1521 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1522 int flags)
1524 target_ulong len_mask = ~(len - 1);
1525 CPUWatchpoint *wp;
1527 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1528 if (addr == wp->vaddr && len_mask == wp->len_mask
1529 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1530 cpu_watchpoint_remove_by_ref(env, wp);
1531 return 0;
1534 return -ENOENT;
1537 /* Remove a specific watchpoint by reference. */
1538 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1540 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1542 tlb_flush_page(env, watchpoint->vaddr);
1544 g_free(watchpoint);
1547 /* Remove all matching watchpoints. */
1548 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1550 CPUWatchpoint *wp, *next;
1552 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1553 if (wp->flags & mask)
1554 cpu_watchpoint_remove_by_ref(env, wp);
1557 #endif
1559 /* Add a breakpoint. */
1560 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1561 CPUBreakpoint **breakpoint)
1563 #if defined(TARGET_HAS_ICE)
1564 CPUBreakpoint *bp;
1566 bp = g_malloc(sizeof(*bp));
1568 bp->pc = pc;
1569 bp->flags = flags;
1571 /* keep all GDB-injected breakpoints in front */
1572 if (flags & BP_GDB)
1573 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1574 else
1575 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1577 breakpoint_invalidate(env, pc);
1579 if (breakpoint)
1580 *breakpoint = bp;
1581 return 0;
1582 #else
1583 return -ENOSYS;
1584 #endif
1587 /* Remove a specific breakpoint. */
1588 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1590 #if defined(TARGET_HAS_ICE)
1591 CPUBreakpoint *bp;
1593 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1594 if (bp->pc == pc && bp->flags == flags) {
1595 cpu_breakpoint_remove_by_ref(env, bp);
1596 return 0;
1599 return -ENOENT;
1600 #else
1601 return -ENOSYS;
1602 #endif
1605 /* Remove a specific breakpoint by reference. */
1606 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1608 #if defined(TARGET_HAS_ICE)
1609 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1611 breakpoint_invalidate(env, breakpoint->pc);
1613 g_free(breakpoint);
1614 #endif
1617 /* Remove all matching breakpoints. */
1618 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1620 #if defined(TARGET_HAS_ICE)
1621 CPUBreakpoint *bp, *next;
1623 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1624 if (bp->flags & mask)
1625 cpu_breakpoint_remove_by_ref(env, bp);
1627 #endif
1630 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1631 CPU loop after each instruction */
1632 void cpu_single_step(CPUState *env, int enabled)
1634 #if defined(TARGET_HAS_ICE)
1635 if (env->singlestep_enabled != enabled) {
1636 env->singlestep_enabled = enabled;
1637 if (kvm_enabled())
1638 kvm_update_guest_debug(env, 0);
1639 else {
1640 /* must flush all the translated code to avoid inconsistencies */
1641 /* XXX: only flush what is necessary */
1642 tb_flush(env);
1645 #endif
1648 /* enable or disable low levels log */
1649 void cpu_set_log(int log_flags)
1651 loglevel = log_flags;
1652 if (loglevel && !logfile) {
1653 logfile = fopen(logfilename, log_append ? "a" : "w");
1654 if (!logfile) {
1655 perror(logfilename);
1656 _exit(1);
1658 #if !defined(CONFIG_SOFTMMU)
1659 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1661 static char logfile_buf[4096];
1662 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1664 #elif defined(_WIN32)
1665 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1666 setvbuf(logfile, NULL, _IONBF, 0);
1667 #else
1668 setvbuf(logfile, NULL, _IOLBF, 0);
1669 #endif
1670 log_append = 1;
1672 if (!loglevel && logfile) {
1673 fclose(logfile);
1674 logfile = NULL;
1678 void cpu_set_log_filename(const char *filename)
1680 logfilename = strdup(filename);
1681 if (logfile) {
1682 fclose(logfile);
1683 logfile = NULL;
1685 cpu_set_log(loglevel);
1688 static void cpu_unlink_tb(CPUState *env)
1690 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1691 problem and hope the cpu will stop of its own accord. For userspace
1692 emulation this often isn't actually as bad as it sounds. Often
1693 signals are used primarily to interrupt blocking syscalls. */
1694 TranslationBlock *tb;
1695 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1697 spin_lock(&interrupt_lock);
1698 tb = env->current_tb;
1699 /* if the cpu is currently executing code, we must unlink it and
1700 all the potentially executing TB */
1701 if (tb) {
1702 env->current_tb = NULL;
1703 tb_reset_jump_recursive(tb);
1705 spin_unlock(&interrupt_lock);
1708 #ifndef CONFIG_USER_ONLY
1709 /* mask must never be zero, except for A20 change call */
1710 static void tcg_handle_interrupt(CPUState *env, int mask)
1712 int old_mask;
1714 old_mask = env->interrupt_request;
1715 env->interrupt_request |= mask;
1718 * If called from iothread context, wake the target cpu in
1719 * case its halted.
1721 if (!qemu_cpu_is_self(env)) {
1722 qemu_cpu_kick(env);
1723 return;
1726 if (use_icount) {
1727 env->icount_decr.u16.high = 0xffff;
1728 if (!can_do_io(env)
1729 && (mask & ~old_mask) != 0) {
1730 cpu_abort(env, "Raised interrupt while not in I/O function");
1732 } else {
1733 cpu_unlink_tb(env);
1737 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1739 #else /* CONFIG_USER_ONLY */
1741 void cpu_interrupt(CPUState *env, int mask)
1743 env->interrupt_request |= mask;
1744 cpu_unlink_tb(env);
1746 #endif /* CONFIG_USER_ONLY */
1748 void cpu_reset_interrupt(CPUState *env, int mask)
1750 env->interrupt_request &= ~mask;
1753 void cpu_exit(CPUState *env)
1755 env->exit_request = 1;
1756 cpu_unlink_tb(env);
1759 const CPULogItem cpu_log_items[] = {
1760 { CPU_LOG_TB_OUT_ASM, "out_asm",
1761 "show generated host assembly code for each compiled TB" },
1762 { CPU_LOG_TB_IN_ASM, "in_asm",
1763 "show target assembly code for each compiled TB" },
1764 { CPU_LOG_TB_OP, "op",
1765 "show micro ops for each compiled TB" },
1766 { CPU_LOG_TB_OP_OPT, "op_opt",
1767 "show micro ops "
1768 #ifdef TARGET_I386
1769 "before eflags optimization and "
1770 #endif
1771 "after liveness analysis" },
1772 { CPU_LOG_INT, "int",
1773 "show interrupts/exceptions in short format" },
1774 { CPU_LOG_EXEC, "exec",
1775 "show trace before each executed TB (lots of logs)" },
1776 { CPU_LOG_TB_CPU, "cpu",
1777 "show CPU state before block translation" },
1778 #ifdef TARGET_I386
1779 { CPU_LOG_PCALL, "pcall",
1780 "show protected mode far calls/returns/exceptions" },
1781 { CPU_LOG_RESET, "cpu_reset",
1782 "show CPU state before CPU resets" },
1783 #endif
1784 #ifdef DEBUG_IOPORT
1785 { CPU_LOG_IOPORT, "ioport",
1786 "show all i/o ports accesses" },
1787 #endif
1788 { 0, NULL, NULL },
1791 static int cmp1(const char *s1, int n, const char *s2)
1793 if (strlen(s2) != n)
1794 return 0;
1795 return memcmp(s1, s2, n) == 0;
1798 /* takes a comma separated list of log masks. Return 0 if error. */
1799 int cpu_str_to_log_mask(const char *str)
1801 const CPULogItem *item;
1802 int mask;
1803 const char *p, *p1;
1805 p = str;
1806 mask = 0;
1807 for(;;) {
1808 p1 = strchr(p, ',');
1809 if (!p1)
1810 p1 = p + strlen(p);
1811 if(cmp1(p,p1-p,"all")) {
1812 for(item = cpu_log_items; item->mask != 0; item++) {
1813 mask |= item->mask;
1815 } else {
1816 for(item = cpu_log_items; item->mask != 0; item++) {
1817 if (cmp1(p, p1 - p, item->name))
1818 goto found;
1820 return 0;
1822 found:
1823 mask |= item->mask;
1824 if (*p1 != ',')
1825 break;
1826 p = p1 + 1;
1828 return mask;
1831 void cpu_abort(CPUState *env, const char *fmt, ...)
1833 va_list ap;
1834 va_list ap2;
1836 va_start(ap, fmt);
1837 va_copy(ap2, ap);
1838 fprintf(stderr, "qemu: fatal: ");
1839 vfprintf(stderr, fmt, ap);
1840 fprintf(stderr, "\n");
1841 #ifdef TARGET_I386
1842 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1843 #else
1844 cpu_dump_state(env, stderr, fprintf, 0);
1845 #endif
1846 if (qemu_log_enabled()) {
1847 qemu_log("qemu: fatal: ");
1848 qemu_log_vprintf(fmt, ap2);
1849 qemu_log("\n");
1850 #ifdef TARGET_I386
1851 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1852 #else
1853 log_cpu_state(env, 0);
1854 #endif
1855 qemu_log_flush();
1856 qemu_log_close();
1858 va_end(ap2);
1859 va_end(ap);
1860 #if defined(CONFIG_USER_ONLY)
1862 struct sigaction act;
1863 sigfillset(&act.sa_mask);
1864 act.sa_handler = SIG_DFL;
1865 sigaction(SIGABRT, &act, NULL);
1867 #endif
1868 abort();
1871 CPUState *cpu_copy(CPUState *env)
1873 CPUState *new_env = cpu_init(env->cpu_model_str);
1874 CPUState *next_cpu = new_env->next_cpu;
1875 int cpu_index = new_env->cpu_index;
1876 #if defined(TARGET_HAS_ICE)
1877 CPUBreakpoint *bp;
1878 CPUWatchpoint *wp;
1879 #endif
1881 memcpy(new_env, env, sizeof(CPUState));
1883 /* Preserve chaining and index. */
1884 new_env->next_cpu = next_cpu;
1885 new_env->cpu_index = cpu_index;
1887 /* Clone all break/watchpoints.
1888 Note: Once we support ptrace with hw-debug register access, make sure
1889 BP_CPU break/watchpoints are handled correctly on clone. */
1890 QTAILQ_INIT(&env->breakpoints);
1891 QTAILQ_INIT(&env->watchpoints);
1892 #if defined(TARGET_HAS_ICE)
1893 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1894 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1896 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1897 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1898 wp->flags, NULL);
1900 #endif
1902 return new_env;
1905 #if !defined(CONFIG_USER_ONLY)
1907 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1909 unsigned int i;
1911 /* Discard jump cache entries for any tb which might potentially
1912 overlap the flushed page. */
1913 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1914 memset (&env->tb_jmp_cache[i], 0,
1915 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1917 i = tb_jmp_cache_hash_page(addr);
1918 memset (&env->tb_jmp_cache[i], 0,
1919 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1922 static CPUTLBEntry s_cputlb_empty_entry = {
1923 .addr_read = -1,
1924 .addr_write = -1,
1925 .addr_code = -1,
1926 .addend = -1,
1929 /* NOTE:
1930 * If flush_global is true (the usual case), flush all tlb entries.
1931 * If flush_global is false, flush (at least) all tlb entries not
1932 * marked global.
1934 * Since QEMU doesn't currently implement a global/not-global flag
1935 * for tlb entries, at the moment tlb_flush() will also flush all
1936 * tlb entries in the flush_global == false case. This is OK because
1937 * CPU architectures generally permit an implementation to drop
1938 * entries from the TLB at any time, so flushing more entries than
1939 * required is only an efficiency issue, not a correctness issue.
1941 void tlb_flush(CPUState *env, int flush_global)
1943 int i;
1945 #if defined(DEBUG_TLB)
1946 printf("tlb_flush:\n");
1947 #endif
1948 /* must reset current TB so that interrupts cannot modify the
1949 links while we are modifying them */
1950 env->current_tb = NULL;
1952 for(i = 0; i < CPU_TLB_SIZE; i++) {
1953 int mmu_idx;
1954 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1955 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1959 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1961 env->tlb_flush_addr = -1;
1962 env->tlb_flush_mask = 0;
1963 tlb_flush_count++;
1966 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1968 if (addr == (tlb_entry->addr_read &
1969 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1970 addr == (tlb_entry->addr_write &
1971 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1972 addr == (tlb_entry->addr_code &
1973 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1974 *tlb_entry = s_cputlb_empty_entry;
1978 void tlb_flush_page(CPUState *env, target_ulong addr)
1980 int i;
1981 int mmu_idx;
1983 #if defined(DEBUG_TLB)
1984 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1985 #endif
1986 /* Check if we need to flush due to large pages. */
1987 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
1988 #if defined(DEBUG_TLB)
1989 printf("tlb_flush_page: forced full flush ("
1990 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
1991 env->tlb_flush_addr, env->tlb_flush_mask);
1992 #endif
1993 tlb_flush(env, 1);
1994 return;
1996 /* must reset current TB so that interrupts cannot modify the
1997 links while we are modifying them */
1998 env->current_tb = NULL;
2000 addr &= TARGET_PAGE_MASK;
2001 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2002 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2003 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2005 tlb_flush_jmp_cache(env, addr);
2008 /* update the TLBs so that writes to code in the virtual page 'addr'
2009 can be detected */
2010 static void tlb_protect_code(ram_addr_t ram_addr)
2012 cpu_physical_memory_reset_dirty(ram_addr,
2013 ram_addr + TARGET_PAGE_SIZE,
2014 CODE_DIRTY_FLAG);
2017 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2018 tested for self modifying code */
2019 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2020 target_ulong vaddr)
2022 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2025 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2026 unsigned long start, unsigned long length)
2028 unsigned long addr;
2029 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2030 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2031 if ((addr - start) < length) {
2032 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2037 /* Note: start and end must be within the same ram block. */
2038 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2039 int dirty_flags)
2041 CPUState *env;
2042 unsigned long length, start1;
2043 int i;
2045 start &= TARGET_PAGE_MASK;
2046 end = TARGET_PAGE_ALIGN(end);
2048 length = end - start;
2049 if (length == 0)
2050 return;
2051 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2053 /* we modify the TLB cache so that the dirty bit will be set again
2054 when accessing the range */
2055 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2056 /* Check that we don't span multiple blocks - this breaks the
2057 address comparisons below. */
2058 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2059 != (end - 1) - start) {
2060 abort();
2063 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2064 int mmu_idx;
2065 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2066 for(i = 0; i < CPU_TLB_SIZE; i++)
2067 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2068 start1, length);
2073 int cpu_physical_memory_set_dirty_tracking(int enable)
2075 int ret = 0;
2076 in_migration = enable;
2077 return ret;
2080 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2082 ram_addr_t ram_addr;
2083 void *p;
2085 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2086 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2087 + tlb_entry->addend);
2088 ram_addr = qemu_ram_addr_from_host_nofail(p);
2089 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2090 tlb_entry->addr_write |= TLB_NOTDIRTY;
2095 /* update the TLB according to the current state of the dirty bits */
2096 void cpu_tlb_update_dirty(CPUState *env)
2098 int i;
2099 int mmu_idx;
2100 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2101 for(i = 0; i < CPU_TLB_SIZE; i++)
2102 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2106 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2108 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2109 tlb_entry->addr_write = vaddr;
2112 /* update the TLB corresponding to virtual page vaddr
2113 so that it is no longer dirty */
2114 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2116 int i;
2117 int mmu_idx;
2119 vaddr &= TARGET_PAGE_MASK;
2120 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2121 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2122 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2125 /* Our TLB does not support large pages, so remember the area covered by
2126 large pages and trigger a full TLB flush if these are invalidated. */
2127 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2128 target_ulong size)
2130 target_ulong mask = ~(size - 1);
2132 if (env->tlb_flush_addr == (target_ulong)-1) {
2133 env->tlb_flush_addr = vaddr & mask;
2134 env->tlb_flush_mask = mask;
2135 return;
2137 /* Extend the existing region to include the new page.
2138 This is a compromise between unnecessary flushes and the cost
2139 of maintaining a full variable size TLB. */
2140 mask &= env->tlb_flush_mask;
2141 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2142 mask <<= 1;
2144 env->tlb_flush_addr &= mask;
2145 env->tlb_flush_mask = mask;
2148 static bool is_ram_rom(MemoryRegionSection *s)
2150 return memory_region_is_ram(s->mr);
2153 static bool is_romd(MemoryRegionSection *s)
2155 MemoryRegion *mr = s->mr;
2157 return mr->rom_device && mr->readable;
2160 static bool is_ram_rom_romd(MemoryRegionSection *s)
2162 return is_ram_rom(s) || is_romd(s);
2165 /* Add a new TLB entry. At most one entry for a given virtual address
2166 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2167 supplied size is only used by tlb_flush_page. */
2168 void tlb_set_page(CPUState *env, target_ulong vaddr,
2169 target_phys_addr_t paddr, int prot,
2170 int mmu_idx, target_ulong size)
2172 MemoryRegionSection section;
2173 unsigned int index;
2174 target_ulong address;
2175 target_ulong code_address;
2176 unsigned long addend;
2177 CPUTLBEntry *te;
2178 CPUWatchpoint *wp;
2179 target_phys_addr_t iotlb;
2181 assert(size >= TARGET_PAGE_SIZE);
2182 if (size != TARGET_PAGE_SIZE) {
2183 tlb_add_large_page(env, vaddr, size);
2185 section = phys_page_find(paddr >> TARGET_PAGE_BITS);
2186 #if defined(DEBUG_TLB)
2187 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2188 " prot=%x idx=%d pd=0x%08lx\n",
2189 vaddr, paddr, prot, mmu_idx, pd);
2190 #endif
2192 address = vaddr;
2193 if (!is_ram_rom_romd(&section)) {
2194 /* IO memory case (romd handled later) */
2195 address |= TLB_MMIO;
2197 if (is_ram_rom_romd(&section)) {
2198 addend = (unsigned long)(memory_region_get_ram_ptr(section.mr)
2199 + section.offset_within_region);
2200 } else {
2201 addend = 0;
2203 if (is_ram_rom(&section)) {
2204 /* Normal RAM. */
2205 iotlb = (memory_region_get_ram_addr(section.mr)
2206 + section.offset_within_region) & TARGET_PAGE_MASK;
2207 if (!section.readonly)
2208 iotlb |= io_mem_notdirty.ram_addr;
2209 else
2210 iotlb |= io_mem_rom.ram_addr;
2211 } else {
2212 /* IO handlers are currently passed a physical address.
2213 It would be nice to pass an offset from the base address
2214 of that region. This would avoid having to special case RAM,
2215 and avoid full address decoding in every device.
2216 We can't use the high bits of pd for this because
2217 IO_MEM_ROMD uses these as a ram address. */
2218 iotlb = memory_region_get_ram_addr(section.mr) & ~TARGET_PAGE_MASK;
2219 iotlb += section.offset_within_region;
2222 code_address = address;
2223 /* Make accesses to pages with watchpoints go via the
2224 watchpoint trap routines. */
2225 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2226 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2227 /* Avoid trapping reads of pages with a write breakpoint. */
2228 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2229 iotlb = io_mem_watch.ram_addr + paddr;
2230 address |= TLB_MMIO;
2231 break;
2236 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2237 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2238 te = &env->tlb_table[mmu_idx][index];
2239 te->addend = addend - vaddr;
2240 if (prot & PAGE_READ) {
2241 te->addr_read = address;
2242 } else {
2243 te->addr_read = -1;
2246 if (prot & PAGE_EXEC) {
2247 te->addr_code = code_address;
2248 } else {
2249 te->addr_code = -1;
2251 if (prot & PAGE_WRITE) {
2252 if ((memory_region_is_ram(section.mr) && section.readonly)
2253 || is_romd(&section)) {
2254 /* Write access calls the I/O callback. */
2255 te->addr_write = address | TLB_MMIO;
2256 } else if (memory_region_is_ram(section.mr)
2257 && !cpu_physical_memory_is_dirty(
2258 section.mr->ram_addr
2259 + section.offset_within_region)) {
2260 te->addr_write = address | TLB_NOTDIRTY;
2261 } else {
2262 te->addr_write = address;
2264 } else {
2265 te->addr_write = -1;
2269 #else
2271 void tlb_flush(CPUState *env, int flush_global)
2275 void tlb_flush_page(CPUState *env, target_ulong addr)
2280 * Walks guest process memory "regions" one by one
2281 * and calls callback function 'fn' for each region.
2284 struct walk_memory_regions_data
2286 walk_memory_regions_fn fn;
2287 void *priv;
2288 unsigned long start;
2289 int prot;
2292 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2293 abi_ulong end, int new_prot)
2295 if (data->start != -1ul) {
2296 int rc = data->fn(data->priv, data->start, end, data->prot);
2297 if (rc != 0) {
2298 return rc;
2302 data->start = (new_prot ? end : -1ul);
2303 data->prot = new_prot;
2305 return 0;
2308 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2309 abi_ulong base, int level, void **lp)
2311 abi_ulong pa;
2312 int i, rc;
2314 if (*lp == NULL) {
2315 return walk_memory_regions_end(data, base, 0);
2318 if (level == 0) {
2319 PageDesc *pd = *lp;
2320 for (i = 0; i < L2_SIZE; ++i) {
2321 int prot = pd[i].flags;
2323 pa = base | (i << TARGET_PAGE_BITS);
2324 if (prot != data->prot) {
2325 rc = walk_memory_regions_end(data, pa, prot);
2326 if (rc != 0) {
2327 return rc;
2331 } else {
2332 void **pp = *lp;
2333 for (i = 0; i < L2_SIZE; ++i) {
2334 pa = base | ((abi_ulong)i <<
2335 (TARGET_PAGE_BITS + L2_BITS * level));
2336 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2337 if (rc != 0) {
2338 return rc;
2343 return 0;
2346 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2348 struct walk_memory_regions_data data;
2349 unsigned long i;
2351 data.fn = fn;
2352 data.priv = priv;
2353 data.start = -1ul;
2354 data.prot = 0;
2356 for (i = 0; i < V_L1_SIZE; i++) {
2357 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2358 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2359 if (rc != 0) {
2360 return rc;
2364 return walk_memory_regions_end(&data, 0, 0);
2367 static int dump_region(void *priv, abi_ulong start,
2368 abi_ulong end, unsigned long prot)
2370 FILE *f = (FILE *)priv;
2372 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2373 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2374 start, end, end - start,
2375 ((prot & PAGE_READ) ? 'r' : '-'),
2376 ((prot & PAGE_WRITE) ? 'w' : '-'),
2377 ((prot & PAGE_EXEC) ? 'x' : '-'));
2379 return (0);
2382 /* dump memory mappings */
2383 void page_dump(FILE *f)
2385 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2386 "start", "end", "size", "prot");
2387 walk_memory_regions(f, dump_region);
2390 int page_get_flags(target_ulong address)
2392 PageDesc *p;
2394 p = page_find(address >> TARGET_PAGE_BITS);
2395 if (!p)
2396 return 0;
2397 return p->flags;
2400 /* Modify the flags of a page and invalidate the code if necessary.
2401 The flag PAGE_WRITE_ORG is positioned automatically depending
2402 on PAGE_WRITE. The mmap_lock should already be held. */
2403 void page_set_flags(target_ulong start, target_ulong end, int flags)
2405 target_ulong addr, len;
2407 /* This function should never be called with addresses outside the
2408 guest address space. If this assert fires, it probably indicates
2409 a missing call to h2g_valid. */
2410 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2411 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2412 #endif
2413 assert(start < end);
2415 start = start & TARGET_PAGE_MASK;
2416 end = TARGET_PAGE_ALIGN(end);
2418 if (flags & PAGE_WRITE) {
2419 flags |= PAGE_WRITE_ORG;
2422 for (addr = start, len = end - start;
2423 len != 0;
2424 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2425 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2427 /* If the write protection bit is set, then we invalidate
2428 the code inside. */
2429 if (!(p->flags & PAGE_WRITE) &&
2430 (flags & PAGE_WRITE) &&
2431 p->first_tb) {
2432 tb_invalidate_phys_page(addr, 0, NULL);
2434 p->flags = flags;
2438 int page_check_range(target_ulong start, target_ulong len, int flags)
2440 PageDesc *p;
2441 target_ulong end;
2442 target_ulong addr;
2444 /* This function should never be called with addresses outside the
2445 guest address space. If this assert fires, it probably indicates
2446 a missing call to h2g_valid. */
2447 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2448 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2449 #endif
2451 if (len == 0) {
2452 return 0;
2454 if (start + len - 1 < start) {
2455 /* We've wrapped around. */
2456 return -1;
2459 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2460 start = start & TARGET_PAGE_MASK;
2462 for (addr = start, len = end - start;
2463 len != 0;
2464 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2465 p = page_find(addr >> TARGET_PAGE_BITS);
2466 if( !p )
2467 return -1;
2468 if( !(p->flags & PAGE_VALID) )
2469 return -1;
2471 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2472 return -1;
2473 if (flags & PAGE_WRITE) {
2474 if (!(p->flags & PAGE_WRITE_ORG))
2475 return -1;
2476 /* unprotect the page if it was put read-only because it
2477 contains translated code */
2478 if (!(p->flags & PAGE_WRITE)) {
2479 if (!page_unprotect(addr, 0, NULL))
2480 return -1;
2482 return 0;
2485 return 0;
2488 /* called from signal handler: invalidate the code and unprotect the
2489 page. Return TRUE if the fault was successfully handled. */
2490 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2492 unsigned int prot;
2493 PageDesc *p;
2494 target_ulong host_start, host_end, addr;
2496 /* Technically this isn't safe inside a signal handler. However we
2497 know this only ever happens in a synchronous SEGV handler, so in
2498 practice it seems to be ok. */
2499 mmap_lock();
2501 p = page_find(address >> TARGET_PAGE_BITS);
2502 if (!p) {
2503 mmap_unlock();
2504 return 0;
2507 /* if the page was really writable, then we change its
2508 protection back to writable */
2509 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2510 host_start = address & qemu_host_page_mask;
2511 host_end = host_start + qemu_host_page_size;
2513 prot = 0;
2514 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2515 p = page_find(addr >> TARGET_PAGE_BITS);
2516 p->flags |= PAGE_WRITE;
2517 prot |= p->flags;
2519 /* and since the content will be modified, we must invalidate
2520 the corresponding translated code. */
2521 tb_invalidate_phys_page(addr, pc, puc);
2522 #ifdef DEBUG_TB_CHECK
2523 tb_invalidate_check(addr);
2524 #endif
2526 mprotect((void *)g2h(host_start), qemu_host_page_size,
2527 prot & PAGE_BITS);
2529 mmap_unlock();
2530 return 1;
2532 mmap_unlock();
2533 return 0;
2536 static inline void tlb_set_dirty(CPUState *env,
2537 unsigned long addr, target_ulong vaddr)
2540 #endif /* defined(CONFIG_USER_ONLY) */
2542 #if !defined(CONFIG_USER_ONLY)
2544 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2545 typedef struct subpage_t {
2546 MemoryRegion iomem;
2547 target_phys_addr_t base;
2548 uint16_t sub_section[TARGET_PAGE_SIZE];
2549 } subpage_t;
2551 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2552 uint16_t section);
2553 static subpage_t *subpage_init(target_phys_addr_t base);
2554 static void destroy_page_desc(uint16_t section_index)
2556 MemoryRegionSection *section = &phys_sections[section_index];
2557 MemoryRegion *mr = section->mr;
2559 if (mr->subpage) {
2560 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2561 memory_region_destroy(&subpage->iomem);
2562 g_free(subpage);
2566 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2568 unsigned i;
2569 PhysPageEntry *p;
2571 if (lp->u.node == PHYS_MAP_NODE_NIL) {
2572 return;
2575 p = phys_map_nodes[lp->u.node];
2576 for (i = 0; i < L2_SIZE; ++i) {
2577 if (level > 0) {
2578 destroy_l2_mapping(&p[i], level - 1);
2579 } else {
2580 destroy_page_desc(p[i].u.leaf);
2583 lp->u.node = PHYS_MAP_NODE_NIL;
2586 static void destroy_all_mappings(void)
2588 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2589 phys_map_nodes_reset();
2592 static uint16_t phys_section_add(MemoryRegionSection *section)
2594 if (phys_sections_nb == phys_sections_nb_alloc) {
2595 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2596 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2597 phys_sections_nb_alloc);
2599 phys_sections[phys_sections_nb] = *section;
2600 return phys_sections_nb++;
2603 static void phys_sections_clear(void)
2605 phys_sections_nb = 0;
2608 /* register physical memory.
2609 For RAM, 'size' must be a multiple of the target page size.
2610 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2611 io memory page. The address used when calling the IO function is
2612 the offset from the start of the region, plus region_offset. Both
2613 start_addr and region_offset are rounded down to a page boundary
2614 before calculating this offset. This should not be a problem unless
2615 the low bits of start_addr and region_offset differ. */
2616 static void register_subpage(MemoryRegionSection *section)
2618 subpage_t *subpage;
2619 target_phys_addr_t base = section->offset_within_address_space
2620 & TARGET_PAGE_MASK;
2621 MemoryRegionSection existing = phys_page_find(base >> TARGET_PAGE_BITS);
2622 MemoryRegionSection subsection = {
2623 .offset_within_address_space = base,
2624 .size = TARGET_PAGE_SIZE,
2626 target_phys_addr_t start, end;
2628 assert(existing.mr->subpage || existing.mr == &io_mem_unassigned);
2630 if (!(existing.mr->subpage)) {
2631 subpage = subpage_init(base);
2632 subsection.mr = &subpage->iomem;
2633 phys_page_set(base >> TARGET_PAGE_BITS, phys_section_add(&subsection));
2634 } else {
2635 subpage = container_of(existing.mr, subpage_t, iomem);
2637 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2638 end = start + section->size;
2639 subpage_register(subpage, start, end, phys_section_add(section));
2643 static void register_multipage(MemoryRegionSection *section)
2645 target_phys_addr_t start_addr = section->offset_within_address_space;
2646 ram_addr_t size = section->size;
2647 target_phys_addr_t addr, end_addr;
2648 uint16_t section_index = phys_section_add(section);
2650 assert(size);
2652 end_addr = start_addr + (target_phys_addr_t)size;
2654 addr = start_addr;
2655 do {
2656 phys_page_set(addr >> TARGET_PAGE_BITS, section_index);
2657 addr += TARGET_PAGE_SIZE;
2658 } while (addr != end_addr);
2661 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2662 bool readonly)
2664 MemoryRegionSection now = *section, remain = *section;
2666 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2667 || (now.size < TARGET_PAGE_SIZE)) {
2668 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2669 - now.offset_within_address_space,
2670 now.size);
2671 register_subpage(&now);
2672 remain.size -= now.size;
2673 remain.offset_within_address_space += now.size;
2674 remain.offset_within_region += now.size;
2676 now = remain;
2677 now.size &= TARGET_PAGE_MASK;
2678 if (now.size) {
2679 register_multipage(&now);
2680 remain.size -= now.size;
2681 remain.offset_within_address_space += now.size;
2682 remain.offset_within_region += now.size;
2684 now = remain;
2685 if (now.size) {
2686 register_subpage(&now);
2691 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2693 if (kvm_enabled())
2694 kvm_coalesce_mmio_region(addr, size);
2697 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2699 if (kvm_enabled())
2700 kvm_uncoalesce_mmio_region(addr, size);
2703 void qemu_flush_coalesced_mmio_buffer(void)
2705 if (kvm_enabled())
2706 kvm_flush_coalesced_mmio_buffer();
2709 #if defined(__linux__) && !defined(TARGET_S390X)
2711 #include <sys/vfs.h>
2713 #define HUGETLBFS_MAGIC 0x958458f6
2715 static long gethugepagesize(const char *path)
2717 struct statfs fs;
2718 int ret;
2720 do {
2721 ret = statfs(path, &fs);
2722 } while (ret != 0 && errno == EINTR);
2724 if (ret != 0) {
2725 perror(path);
2726 return 0;
2729 if (fs.f_type != HUGETLBFS_MAGIC)
2730 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2732 return fs.f_bsize;
2735 static void *file_ram_alloc(RAMBlock *block,
2736 ram_addr_t memory,
2737 const char *path)
2739 char *filename;
2740 void *area;
2741 int fd;
2742 #ifdef MAP_POPULATE
2743 int flags;
2744 #endif
2745 unsigned long hpagesize;
2747 hpagesize = gethugepagesize(path);
2748 if (!hpagesize) {
2749 return NULL;
2752 if (memory < hpagesize) {
2753 return NULL;
2756 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2757 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2758 return NULL;
2761 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2762 return NULL;
2765 fd = mkstemp(filename);
2766 if (fd < 0) {
2767 perror("unable to create backing store for hugepages");
2768 free(filename);
2769 return NULL;
2771 unlink(filename);
2772 free(filename);
2774 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2777 * ftruncate is not supported by hugetlbfs in older
2778 * hosts, so don't bother bailing out on errors.
2779 * If anything goes wrong with it under other filesystems,
2780 * mmap will fail.
2782 if (ftruncate(fd, memory))
2783 perror("ftruncate");
2785 #ifdef MAP_POPULATE
2786 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2787 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2788 * to sidestep this quirk.
2790 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2791 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2792 #else
2793 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2794 #endif
2795 if (area == MAP_FAILED) {
2796 perror("file_ram_alloc: can't mmap RAM pages");
2797 close(fd);
2798 return (NULL);
2800 block->fd = fd;
2801 return area;
2803 #endif
2805 static ram_addr_t find_ram_offset(ram_addr_t size)
2807 RAMBlock *block, *next_block;
2808 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2810 if (QLIST_EMPTY(&ram_list.blocks))
2811 return 0;
2813 QLIST_FOREACH(block, &ram_list.blocks, next) {
2814 ram_addr_t end, next = RAM_ADDR_MAX;
2816 end = block->offset + block->length;
2818 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2819 if (next_block->offset >= end) {
2820 next = MIN(next, next_block->offset);
2823 if (next - end >= size && next - end < mingap) {
2824 offset = end;
2825 mingap = next - end;
2829 if (offset == RAM_ADDR_MAX) {
2830 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2831 (uint64_t)size);
2832 abort();
2835 return offset;
2838 static ram_addr_t last_ram_offset(void)
2840 RAMBlock *block;
2841 ram_addr_t last = 0;
2843 QLIST_FOREACH(block, &ram_list.blocks, next)
2844 last = MAX(last, block->offset + block->length);
2846 return last;
2849 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2851 RAMBlock *new_block, *block;
2853 new_block = NULL;
2854 QLIST_FOREACH(block, &ram_list.blocks, next) {
2855 if (block->offset == addr) {
2856 new_block = block;
2857 break;
2860 assert(new_block);
2861 assert(!new_block->idstr[0]);
2863 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2864 char *id = dev->parent_bus->info->get_dev_path(dev);
2865 if (id) {
2866 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2867 g_free(id);
2870 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2872 QLIST_FOREACH(block, &ram_list.blocks, next) {
2873 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2874 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2875 new_block->idstr);
2876 abort();
2881 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2882 MemoryRegion *mr)
2884 RAMBlock *new_block;
2886 size = TARGET_PAGE_ALIGN(size);
2887 new_block = g_malloc0(sizeof(*new_block));
2889 new_block->mr = mr;
2890 new_block->offset = find_ram_offset(size);
2891 if (host) {
2892 new_block->host = host;
2893 new_block->flags |= RAM_PREALLOC_MASK;
2894 } else {
2895 if (mem_path) {
2896 #if defined (__linux__) && !defined(TARGET_S390X)
2897 new_block->host = file_ram_alloc(new_block, size, mem_path);
2898 if (!new_block->host) {
2899 new_block->host = qemu_vmalloc(size);
2900 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2902 #else
2903 fprintf(stderr, "-mem-path option unsupported\n");
2904 exit(1);
2905 #endif
2906 } else {
2907 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2908 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2909 an system defined value, which is at least 256GB. Larger systems
2910 have larger values. We put the guest between the end of data
2911 segment (system break) and this value. We use 32GB as a base to
2912 have enough room for the system break to grow. */
2913 new_block->host = mmap((void*)0x800000000, size,
2914 PROT_EXEC|PROT_READ|PROT_WRITE,
2915 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2916 if (new_block->host == MAP_FAILED) {
2917 fprintf(stderr, "Allocating RAM failed\n");
2918 abort();
2920 #else
2921 if (xen_enabled()) {
2922 xen_ram_alloc(new_block->offset, size, mr);
2923 } else {
2924 new_block->host = qemu_vmalloc(size);
2926 #endif
2927 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2930 new_block->length = size;
2932 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2934 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2935 last_ram_offset() >> TARGET_PAGE_BITS);
2936 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2937 0xff, size >> TARGET_PAGE_BITS);
2939 if (kvm_enabled())
2940 kvm_setup_guest_memory(new_block->host, size);
2942 return new_block->offset;
2945 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2947 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2950 void qemu_ram_free_from_ptr(ram_addr_t addr)
2952 RAMBlock *block;
2954 QLIST_FOREACH(block, &ram_list.blocks, next) {
2955 if (addr == block->offset) {
2956 QLIST_REMOVE(block, next);
2957 g_free(block);
2958 return;
2963 void qemu_ram_free(ram_addr_t addr)
2965 RAMBlock *block;
2967 QLIST_FOREACH(block, &ram_list.blocks, next) {
2968 if (addr == block->offset) {
2969 QLIST_REMOVE(block, next);
2970 if (block->flags & RAM_PREALLOC_MASK) {
2972 } else if (mem_path) {
2973 #if defined (__linux__) && !defined(TARGET_S390X)
2974 if (block->fd) {
2975 munmap(block->host, block->length);
2976 close(block->fd);
2977 } else {
2978 qemu_vfree(block->host);
2980 #else
2981 abort();
2982 #endif
2983 } else {
2984 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2985 munmap(block->host, block->length);
2986 #else
2987 if (xen_enabled()) {
2988 xen_invalidate_map_cache_entry(block->host);
2989 } else {
2990 qemu_vfree(block->host);
2992 #endif
2994 g_free(block);
2995 return;
3001 #ifndef _WIN32
3002 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3004 RAMBlock *block;
3005 ram_addr_t offset;
3006 int flags;
3007 void *area, *vaddr;
3009 QLIST_FOREACH(block, &ram_list.blocks, next) {
3010 offset = addr - block->offset;
3011 if (offset < block->length) {
3012 vaddr = block->host + offset;
3013 if (block->flags & RAM_PREALLOC_MASK) {
3015 } else {
3016 flags = MAP_FIXED;
3017 munmap(vaddr, length);
3018 if (mem_path) {
3019 #if defined(__linux__) && !defined(TARGET_S390X)
3020 if (block->fd) {
3021 #ifdef MAP_POPULATE
3022 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3023 MAP_PRIVATE;
3024 #else
3025 flags |= MAP_PRIVATE;
3026 #endif
3027 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3028 flags, block->fd, offset);
3029 } else {
3030 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3031 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3032 flags, -1, 0);
3034 #else
3035 abort();
3036 #endif
3037 } else {
3038 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3039 flags |= MAP_SHARED | MAP_ANONYMOUS;
3040 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3041 flags, -1, 0);
3042 #else
3043 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3044 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3045 flags, -1, 0);
3046 #endif
3048 if (area != vaddr) {
3049 fprintf(stderr, "Could not remap addr: "
3050 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3051 length, addr);
3052 exit(1);
3054 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3056 return;
3060 #endif /* !_WIN32 */
3062 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3063 With the exception of the softmmu code in this file, this should
3064 only be used for local memory (e.g. video ram) that the device owns,
3065 and knows it isn't going to access beyond the end of the block.
3067 It should not be used for general purpose DMA.
3068 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3070 void *qemu_get_ram_ptr(ram_addr_t addr)
3072 RAMBlock *block;
3074 QLIST_FOREACH(block, &ram_list.blocks, next) {
3075 if (addr - block->offset < block->length) {
3076 /* Move this entry to to start of the list. */
3077 if (block != QLIST_FIRST(&ram_list.blocks)) {
3078 QLIST_REMOVE(block, next);
3079 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3081 if (xen_enabled()) {
3082 /* We need to check if the requested address is in the RAM
3083 * because we don't want to map the entire memory in QEMU.
3084 * In that case just map until the end of the page.
3086 if (block->offset == 0) {
3087 return xen_map_cache(addr, 0, 0);
3088 } else if (block->host == NULL) {
3089 block->host =
3090 xen_map_cache(block->offset, block->length, 1);
3093 return block->host + (addr - block->offset);
3097 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3098 abort();
3100 return NULL;
3103 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3104 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3106 void *qemu_safe_ram_ptr(ram_addr_t addr)
3108 RAMBlock *block;
3110 QLIST_FOREACH(block, &ram_list.blocks, next) {
3111 if (addr - block->offset < block->length) {
3112 if (xen_enabled()) {
3113 /* We need to check if the requested address is in the RAM
3114 * because we don't want to map the entire memory in QEMU.
3115 * In that case just map until the end of the page.
3117 if (block->offset == 0) {
3118 return xen_map_cache(addr, 0, 0);
3119 } else if (block->host == NULL) {
3120 block->host =
3121 xen_map_cache(block->offset, block->length, 1);
3124 return block->host + (addr - block->offset);
3128 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3129 abort();
3131 return NULL;
3134 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3135 * but takes a size argument */
3136 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3138 if (*size == 0) {
3139 return NULL;
3141 if (xen_enabled()) {
3142 return xen_map_cache(addr, *size, 1);
3143 } else {
3144 RAMBlock *block;
3146 QLIST_FOREACH(block, &ram_list.blocks, next) {
3147 if (addr - block->offset < block->length) {
3148 if (addr - block->offset + *size > block->length)
3149 *size = block->length - addr + block->offset;
3150 return block->host + (addr - block->offset);
3154 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3155 abort();
3159 void qemu_put_ram_ptr(void *addr)
3161 trace_qemu_put_ram_ptr(addr);
3164 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3166 RAMBlock *block;
3167 uint8_t *host = ptr;
3169 if (xen_enabled()) {
3170 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3171 return 0;
3174 QLIST_FOREACH(block, &ram_list.blocks, next) {
3175 /* This case append when the block is not mapped. */
3176 if (block->host == NULL) {
3177 continue;
3179 if (host - block->host < block->length) {
3180 *ram_addr = block->offset + (host - block->host);
3181 return 0;
3185 return -1;
3188 /* Some of the softmmu routines need to translate from a host pointer
3189 (typically a TLB entry) back to a ram offset. */
3190 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3192 ram_addr_t ram_addr;
3194 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3195 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3196 abort();
3198 return ram_addr;
3201 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
3202 unsigned size)
3204 #ifdef DEBUG_UNASSIGNED
3205 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3206 #endif
3207 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3208 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
3209 #endif
3210 return 0;
3213 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
3214 uint64_t val, unsigned size)
3216 #ifdef DEBUG_UNASSIGNED
3217 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
3218 #endif
3219 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3220 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
3221 #endif
3224 static const MemoryRegionOps unassigned_mem_ops = {
3225 .read = unassigned_mem_read,
3226 .write = unassigned_mem_write,
3227 .endianness = DEVICE_NATIVE_ENDIAN,
3230 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
3231 unsigned size)
3233 abort();
3236 static void error_mem_write(void *opaque, target_phys_addr_t addr,
3237 uint64_t value, unsigned size)
3239 abort();
3242 static const MemoryRegionOps error_mem_ops = {
3243 .read = error_mem_read,
3244 .write = error_mem_write,
3245 .endianness = DEVICE_NATIVE_ENDIAN,
3248 static const MemoryRegionOps rom_mem_ops = {
3249 .read = error_mem_read,
3250 .write = unassigned_mem_write,
3251 .endianness = DEVICE_NATIVE_ENDIAN,
3254 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
3255 uint64_t val, unsigned size)
3257 int dirty_flags;
3258 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3259 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3260 #if !defined(CONFIG_USER_ONLY)
3261 tb_invalidate_phys_page_fast(ram_addr, size);
3262 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3263 #endif
3265 switch (size) {
3266 case 1:
3267 stb_p(qemu_get_ram_ptr(ram_addr), val);
3268 break;
3269 case 2:
3270 stw_p(qemu_get_ram_ptr(ram_addr), val);
3271 break;
3272 case 4:
3273 stl_p(qemu_get_ram_ptr(ram_addr), val);
3274 break;
3275 default:
3276 abort();
3278 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3279 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3280 /* we remove the notdirty callback only if the code has been
3281 flushed */
3282 if (dirty_flags == 0xff)
3283 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3286 static const MemoryRegionOps notdirty_mem_ops = {
3287 .read = error_mem_read,
3288 .write = notdirty_mem_write,
3289 .endianness = DEVICE_NATIVE_ENDIAN,
3292 /* Generate a debug exception if a watchpoint has been hit. */
3293 static void check_watchpoint(int offset, int len_mask, int flags)
3295 CPUState *env = cpu_single_env;
3296 target_ulong pc, cs_base;
3297 TranslationBlock *tb;
3298 target_ulong vaddr;
3299 CPUWatchpoint *wp;
3300 int cpu_flags;
3302 if (env->watchpoint_hit) {
3303 /* We re-entered the check after replacing the TB. Now raise
3304 * the debug interrupt so that is will trigger after the
3305 * current instruction. */
3306 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3307 return;
3309 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3310 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3311 if ((vaddr == (wp->vaddr & len_mask) ||
3312 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3313 wp->flags |= BP_WATCHPOINT_HIT;
3314 if (!env->watchpoint_hit) {
3315 env->watchpoint_hit = wp;
3316 tb = tb_find_pc(env->mem_io_pc);
3317 if (!tb) {
3318 cpu_abort(env, "check_watchpoint: could not find TB for "
3319 "pc=%p", (void *)env->mem_io_pc);
3321 cpu_restore_state(tb, env, env->mem_io_pc);
3322 tb_phys_invalidate(tb, -1);
3323 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3324 env->exception_index = EXCP_DEBUG;
3325 } else {
3326 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3327 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3329 cpu_resume_from_signal(env, NULL);
3331 } else {
3332 wp->flags &= ~BP_WATCHPOINT_HIT;
3337 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3338 so these check for a hit then pass through to the normal out-of-line
3339 phys routines. */
3340 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3341 unsigned size)
3343 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3344 switch (size) {
3345 case 1: return ldub_phys(addr);
3346 case 2: return lduw_phys(addr);
3347 case 4: return ldl_phys(addr);
3348 default: abort();
3352 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3353 uint64_t val, unsigned size)
3355 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3356 switch (size) {
3357 case 1: stb_phys(addr, val);
3358 case 2: stw_phys(addr, val);
3359 case 4: stl_phys(addr, val);
3360 default: abort();
3364 static const MemoryRegionOps watch_mem_ops = {
3365 .read = watch_mem_read,
3366 .write = watch_mem_write,
3367 .endianness = DEVICE_NATIVE_ENDIAN,
3370 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3371 unsigned len)
3373 subpage_t *mmio = opaque;
3374 unsigned int idx = SUBPAGE_IDX(addr);
3375 MemoryRegionSection *section;
3376 #if defined(DEBUG_SUBPAGE)
3377 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3378 mmio, len, addr, idx);
3379 #endif
3381 section = &phys_sections[mmio->sub_section[idx]];
3382 addr += mmio->base;
3383 addr -= section->offset_within_address_space;
3384 addr += section->offset_within_region;
3385 return io_mem_read(section->mr->ram_addr, addr, len);
3388 static void subpage_write(void *opaque, target_phys_addr_t addr,
3389 uint64_t value, unsigned len)
3391 subpage_t *mmio = opaque;
3392 unsigned int idx = SUBPAGE_IDX(addr);
3393 MemoryRegionSection *section;
3394 #if defined(DEBUG_SUBPAGE)
3395 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3396 " idx %d value %"PRIx64"\n",
3397 __func__, mmio, len, addr, idx, value);
3398 #endif
3400 section = &phys_sections[mmio->sub_section[idx]];
3401 addr += mmio->base;
3402 addr -= section->offset_within_address_space;
3403 addr += section->offset_within_region;
3404 io_mem_write(section->mr->ram_addr, addr, value, len);
3407 static const MemoryRegionOps subpage_ops = {
3408 .read = subpage_read,
3409 .write = subpage_write,
3410 .endianness = DEVICE_NATIVE_ENDIAN,
3413 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3414 unsigned size)
3416 ram_addr_t raddr = addr;
3417 void *ptr = qemu_get_ram_ptr(raddr);
3418 switch (size) {
3419 case 1: return ldub_p(ptr);
3420 case 2: return lduw_p(ptr);
3421 case 4: return ldl_p(ptr);
3422 default: abort();
3426 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3427 uint64_t value, unsigned size)
3429 ram_addr_t raddr = addr;
3430 void *ptr = qemu_get_ram_ptr(raddr);
3431 switch (size) {
3432 case 1: return stb_p(ptr, value);
3433 case 2: return stw_p(ptr, value);
3434 case 4: return stl_p(ptr, value);
3435 default: abort();
3439 static const MemoryRegionOps subpage_ram_ops = {
3440 .read = subpage_ram_read,
3441 .write = subpage_ram_write,
3442 .endianness = DEVICE_NATIVE_ENDIAN,
3445 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3446 uint16_t section)
3448 int idx, eidx;
3450 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3451 return -1;
3452 idx = SUBPAGE_IDX(start);
3453 eidx = SUBPAGE_IDX(end);
3454 #if defined(DEBUG_SUBPAGE)
3455 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3456 mmio, start, end, idx, eidx, memory);
3457 #endif
3458 if (memory_region_is_ram(phys_sections[section].mr)) {
3459 MemoryRegionSection new_section = phys_sections[section];
3460 new_section.mr = &io_mem_subpage_ram;
3461 section = phys_section_add(&new_section);
3463 for (; idx <= eidx; idx++) {
3464 mmio->sub_section[idx] = section;
3467 return 0;
3470 static subpage_t *subpage_init(target_phys_addr_t base)
3472 subpage_t *mmio;
3474 mmio = g_malloc0(sizeof(subpage_t));
3476 mmio->base = base;
3477 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3478 "subpage", TARGET_PAGE_SIZE);
3479 mmio->iomem.subpage = true;
3480 #if defined(DEBUG_SUBPAGE)
3481 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3482 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3483 #endif
3484 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3486 return mmio;
3489 static int get_free_io_mem_idx(void)
3491 int i;
3493 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3494 if (!io_mem_used[i]) {
3495 io_mem_used[i] = 1;
3496 return i;
3498 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3499 return -1;
3502 /* mem_read and mem_write are arrays of functions containing the
3503 function to access byte (index 0), word (index 1) and dword (index
3504 2). Functions can be omitted with a NULL function pointer.
3505 If io_index is non zero, the corresponding io zone is
3506 modified. If it is zero, a new io zone is allocated. The return
3507 value can be used with cpu_register_physical_memory(). (-1) is
3508 returned if error. */
3509 static int cpu_register_io_memory_fixed(int io_index, MemoryRegion *mr)
3511 if (io_index <= 0) {
3512 io_index = get_free_io_mem_idx();
3513 if (io_index == -1)
3514 return io_index;
3515 } else {
3516 if (io_index >= IO_MEM_NB_ENTRIES)
3517 return -1;
3520 io_mem_region[io_index] = mr;
3522 return io_index;
3525 int cpu_register_io_memory(MemoryRegion *mr)
3527 return cpu_register_io_memory_fixed(0, mr);
3530 void cpu_unregister_io_memory(int io_index)
3532 io_mem_region[io_index] = NULL;
3533 io_mem_used[io_index] = 0;
3536 static uint16_t dummy_section(MemoryRegion *mr)
3538 MemoryRegionSection section = {
3539 .mr = mr,
3540 .offset_within_address_space = 0,
3541 .offset_within_region = 0,
3542 .size = UINT64_MAX,
3545 return phys_section_add(&section);
3548 static void io_mem_init(void)
3550 int i;
3552 /* Must be first: */
3553 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3554 assert(io_mem_ram.ram_addr == 0);
3555 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3556 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3557 "unassigned", UINT64_MAX);
3558 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3559 "notdirty", UINT64_MAX);
3560 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3561 "subpage-ram", UINT64_MAX);
3562 for (i=0; i<5; i++)
3563 io_mem_used[i] = 1;
3565 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3566 "watch", UINT64_MAX);
3569 static void core_begin(MemoryListener *listener)
3571 destroy_all_mappings();
3572 phys_sections_clear();
3573 phys_map.u.node = PHYS_MAP_NODE_NIL;
3574 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3577 static void core_commit(MemoryListener *listener)
3579 CPUState *env;
3581 /* since each CPU stores ram addresses in its TLB cache, we must
3582 reset the modified entries */
3583 /* XXX: slow ! */
3584 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3585 tlb_flush(env, 1);
3589 static void core_region_add(MemoryListener *listener,
3590 MemoryRegionSection *section)
3592 cpu_register_physical_memory_log(section, section->readonly);
3595 static void core_region_del(MemoryListener *listener,
3596 MemoryRegionSection *section)
3600 static void core_region_nop(MemoryListener *listener,
3601 MemoryRegionSection *section)
3603 cpu_register_physical_memory_log(section, section->readonly);
3606 static void core_log_start(MemoryListener *listener,
3607 MemoryRegionSection *section)
3611 static void core_log_stop(MemoryListener *listener,
3612 MemoryRegionSection *section)
3616 static void core_log_sync(MemoryListener *listener,
3617 MemoryRegionSection *section)
3621 static void core_log_global_start(MemoryListener *listener)
3623 cpu_physical_memory_set_dirty_tracking(1);
3626 static void core_log_global_stop(MemoryListener *listener)
3628 cpu_physical_memory_set_dirty_tracking(0);
3631 static void core_eventfd_add(MemoryListener *listener,
3632 MemoryRegionSection *section,
3633 bool match_data, uint64_t data, int fd)
3637 static void core_eventfd_del(MemoryListener *listener,
3638 MemoryRegionSection *section,
3639 bool match_data, uint64_t data, int fd)
3643 static void io_begin(MemoryListener *listener)
3647 static void io_commit(MemoryListener *listener)
3651 static void io_region_add(MemoryListener *listener,
3652 MemoryRegionSection *section)
3654 iorange_init(&section->mr->iorange, &memory_region_iorange_ops,
3655 section->offset_within_address_space, section->size);
3656 ioport_register(&section->mr->iorange);
3659 static void io_region_del(MemoryListener *listener,
3660 MemoryRegionSection *section)
3662 isa_unassign_ioport(section->offset_within_address_space, section->size);
3665 static void io_region_nop(MemoryListener *listener,
3666 MemoryRegionSection *section)
3670 static void io_log_start(MemoryListener *listener,
3671 MemoryRegionSection *section)
3675 static void io_log_stop(MemoryListener *listener,
3676 MemoryRegionSection *section)
3680 static void io_log_sync(MemoryListener *listener,
3681 MemoryRegionSection *section)
3685 static void io_log_global_start(MemoryListener *listener)
3689 static void io_log_global_stop(MemoryListener *listener)
3693 static void io_eventfd_add(MemoryListener *listener,
3694 MemoryRegionSection *section,
3695 bool match_data, uint64_t data, int fd)
3699 static void io_eventfd_del(MemoryListener *listener,
3700 MemoryRegionSection *section,
3701 bool match_data, uint64_t data, int fd)
3705 static MemoryListener core_memory_listener = {
3706 .begin = core_begin,
3707 .commit = core_commit,
3708 .region_add = core_region_add,
3709 .region_del = core_region_del,
3710 .region_nop = core_region_nop,
3711 .log_start = core_log_start,
3712 .log_stop = core_log_stop,
3713 .log_sync = core_log_sync,
3714 .log_global_start = core_log_global_start,
3715 .log_global_stop = core_log_global_stop,
3716 .eventfd_add = core_eventfd_add,
3717 .eventfd_del = core_eventfd_del,
3718 .priority = 0,
3721 static MemoryListener io_memory_listener = {
3722 .begin = io_begin,
3723 .commit = io_commit,
3724 .region_add = io_region_add,
3725 .region_del = io_region_del,
3726 .region_nop = io_region_nop,
3727 .log_start = io_log_start,
3728 .log_stop = io_log_stop,
3729 .log_sync = io_log_sync,
3730 .log_global_start = io_log_global_start,
3731 .log_global_stop = io_log_global_stop,
3732 .eventfd_add = io_eventfd_add,
3733 .eventfd_del = io_eventfd_del,
3734 .priority = 0,
3737 static void memory_map_init(void)
3739 system_memory = g_malloc(sizeof(*system_memory));
3740 memory_region_init(system_memory, "system", INT64_MAX);
3741 set_system_memory_map(system_memory);
3743 system_io = g_malloc(sizeof(*system_io));
3744 memory_region_init(system_io, "io", 65536);
3745 set_system_io_map(system_io);
3747 memory_listener_register(&core_memory_listener, system_memory);
3748 memory_listener_register(&io_memory_listener, system_io);
3751 MemoryRegion *get_system_memory(void)
3753 return system_memory;
3756 MemoryRegion *get_system_io(void)
3758 return system_io;
3761 #endif /* !defined(CONFIG_USER_ONLY) */
3763 /* physical memory access (slow version, mainly for debug) */
3764 #if defined(CONFIG_USER_ONLY)
3765 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3766 uint8_t *buf, int len, int is_write)
3768 int l, flags;
3769 target_ulong page;
3770 void * p;
3772 while (len > 0) {
3773 page = addr & TARGET_PAGE_MASK;
3774 l = (page + TARGET_PAGE_SIZE) - addr;
3775 if (l > len)
3776 l = len;
3777 flags = page_get_flags(page);
3778 if (!(flags & PAGE_VALID))
3779 return -1;
3780 if (is_write) {
3781 if (!(flags & PAGE_WRITE))
3782 return -1;
3783 /* XXX: this code should not depend on lock_user */
3784 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3785 return -1;
3786 memcpy(p, buf, l);
3787 unlock_user(p, addr, l);
3788 } else {
3789 if (!(flags & PAGE_READ))
3790 return -1;
3791 /* XXX: this code should not depend on lock_user */
3792 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3793 return -1;
3794 memcpy(buf, p, l);
3795 unlock_user(p, addr, 0);
3797 len -= l;
3798 buf += l;
3799 addr += l;
3801 return 0;
3804 #else
3805 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3806 int len, int is_write)
3808 int l, io_index;
3809 uint8_t *ptr;
3810 uint32_t val;
3811 target_phys_addr_t page;
3812 MemoryRegionSection section;
3814 while (len > 0) {
3815 page = addr & TARGET_PAGE_MASK;
3816 l = (page + TARGET_PAGE_SIZE) - addr;
3817 if (l > len)
3818 l = len;
3819 section = phys_page_find(page >> TARGET_PAGE_BITS);
3821 if (is_write) {
3822 if (!memory_region_is_ram(section.mr)) {
3823 target_phys_addr_t addr1;
3824 io_index = memory_region_get_ram_addr(section.mr)
3825 & (IO_MEM_NB_ENTRIES - 1);
3826 addr1 = (addr & ~TARGET_PAGE_MASK)
3827 + section.offset_within_region;
3828 /* XXX: could force cpu_single_env to NULL to avoid
3829 potential bugs */
3830 if (l >= 4 && ((addr1 & 3) == 0)) {
3831 /* 32 bit write access */
3832 val = ldl_p(buf);
3833 io_mem_write(io_index, addr1, val, 4);
3834 l = 4;
3835 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3836 /* 16 bit write access */
3837 val = lduw_p(buf);
3838 io_mem_write(io_index, addr1, val, 2);
3839 l = 2;
3840 } else {
3841 /* 8 bit write access */
3842 val = ldub_p(buf);
3843 io_mem_write(io_index, addr1, val, 1);
3844 l = 1;
3846 } else if (!section.readonly) {
3847 ram_addr_t addr1;
3848 addr1 = (memory_region_get_ram_addr(section.mr)
3849 + section.offset_within_region)
3850 | (addr & ~TARGET_PAGE_MASK);
3851 /* RAM case */
3852 ptr = qemu_get_ram_ptr(addr1);
3853 memcpy(ptr, buf, l);
3854 if (!cpu_physical_memory_is_dirty(addr1)) {
3855 /* invalidate code */
3856 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3857 /* set dirty bit */
3858 cpu_physical_memory_set_dirty_flags(
3859 addr1, (0xff & ~CODE_DIRTY_FLAG));
3861 qemu_put_ram_ptr(ptr);
3863 } else {
3864 if (!is_ram_rom_romd(&section)) {
3865 target_phys_addr_t addr1;
3866 /* I/O case */
3867 io_index = memory_region_get_ram_addr(section.mr)
3868 & (IO_MEM_NB_ENTRIES - 1);
3869 addr1 = (addr & ~TARGET_PAGE_MASK)
3870 + section.offset_within_region;
3871 if (l >= 4 && ((addr1 & 3) == 0)) {
3872 /* 32 bit read access */
3873 val = io_mem_read(io_index, addr1, 4);
3874 stl_p(buf, val);
3875 l = 4;
3876 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3877 /* 16 bit read access */
3878 val = io_mem_read(io_index, addr1, 2);
3879 stw_p(buf, val);
3880 l = 2;
3881 } else {
3882 /* 8 bit read access */
3883 val = io_mem_read(io_index, addr1, 1);
3884 stb_p(buf, val);
3885 l = 1;
3887 } else {
3888 /* RAM case */
3889 ptr = qemu_get_ram_ptr(section.mr->ram_addr
3890 + section.offset_within_region);
3891 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3892 qemu_put_ram_ptr(ptr);
3895 len -= l;
3896 buf += l;
3897 addr += l;
3901 /* used for ROM loading : can write in RAM and ROM */
3902 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3903 const uint8_t *buf, int len)
3905 int l;
3906 uint8_t *ptr;
3907 target_phys_addr_t page;
3908 MemoryRegionSection section;
3910 while (len > 0) {
3911 page = addr & TARGET_PAGE_MASK;
3912 l = (page + TARGET_PAGE_SIZE) - addr;
3913 if (l > len)
3914 l = len;
3915 section = phys_page_find(page >> TARGET_PAGE_BITS);
3917 if (!is_ram_rom_romd(&section)) {
3918 /* do nothing */
3919 } else {
3920 unsigned long addr1;
3921 addr1 = (memory_region_get_ram_addr(section.mr)
3922 + section.offset_within_region)
3923 + (addr & ~TARGET_PAGE_MASK);
3924 /* ROM/RAM case */
3925 ptr = qemu_get_ram_ptr(addr1);
3926 memcpy(ptr, buf, l);
3927 qemu_put_ram_ptr(ptr);
3929 len -= l;
3930 buf += l;
3931 addr += l;
3935 typedef struct {
3936 void *buffer;
3937 target_phys_addr_t addr;
3938 target_phys_addr_t len;
3939 } BounceBuffer;
3941 static BounceBuffer bounce;
3943 typedef struct MapClient {
3944 void *opaque;
3945 void (*callback)(void *opaque);
3946 QLIST_ENTRY(MapClient) link;
3947 } MapClient;
3949 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3950 = QLIST_HEAD_INITIALIZER(map_client_list);
3952 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3954 MapClient *client = g_malloc(sizeof(*client));
3956 client->opaque = opaque;
3957 client->callback = callback;
3958 QLIST_INSERT_HEAD(&map_client_list, client, link);
3959 return client;
3962 void cpu_unregister_map_client(void *_client)
3964 MapClient *client = (MapClient *)_client;
3966 QLIST_REMOVE(client, link);
3967 g_free(client);
3970 static void cpu_notify_map_clients(void)
3972 MapClient *client;
3974 while (!QLIST_EMPTY(&map_client_list)) {
3975 client = QLIST_FIRST(&map_client_list);
3976 client->callback(client->opaque);
3977 cpu_unregister_map_client(client);
3981 /* Map a physical memory region into a host virtual address.
3982 * May map a subset of the requested range, given by and returned in *plen.
3983 * May return NULL if resources needed to perform the mapping are exhausted.
3984 * Use only for reads OR writes - not for read-modify-write operations.
3985 * Use cpu_register_map_client() to know when retrying the map operation is
3986 * likely to succeed.
3988 void *cpu_physical_memory_map(target_phys_addr_t addr,
3989 target_phys_addr_t *plen,
3990 int is_write)
3992 target_phys_addr_t len = *plen;
3993 target_phys_addr_t todo = 0;
3994 int l;
3995 target_phys_addr_t page;
3996 MemoryRegionSection section;
3997 ram_addr_t raddr = RAM_ADDR_MAX;
3998 ram_addr_t rlen;
3999 void *ret;
4001 while (len > 0) {
4002 page = addr & TARGET_PAGE_MASK;
4003 l = (page + TARGET_PAGE_SIZE) - addr;
4004 if (l > len)
4005 l = len;
4006 section = phys_page_find(page >> TARGET_PAGE_BITS);
4008 if (!(memory_region_is_ram(section.mr) && !section.readonly)) {
4009 if (todo || bounce.buffer) {
4010 break;
4012 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4013 bounce.addr = addr;
4014 bounce.len = l;
4015 if (!is_write) {
4016 cpu_physical_memory_read(addr, bounce.buffer, l);
4019 *plen = l;
4020 return bounce.buffer;
4022 if (!todo) {
4023 raddr = memory_region_get_ram_addr(section.mr)
4024 + section.offset_within_region
4025 + (addr & ~TARGET_PAGE_MASK);
4028 len -= l;
4029 addr += l;
4030 todo += l;
4032 rlen = todo;
4033 ret = qemu_ram_ptr_length(raddr, &rlen);
4034 *plen = rlen;
4035 return ret;
4038 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4039 * Will also mark the memory as dirty if is_write == 1. access_len gives
4040 * the amount of memory that was actually read or written by the caller.
4042 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4043 int is_write, target_phys_addr_t access_len)
4045 if (buffer != bounce.buffer) {
4046 if (is_write) {
4047 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4048 while (access_len) {
4049 unsigned l;
4050 l = TARGET_PAGE_SIZE;
4051 if (l > access_len)
4052 l = access_len;
4053 if (!cpu_physical_memory_is_dirty(addr1)) {
4054 /* invalidate code */
4055 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4056 /* set dirty bit */
4057 cpu_physical_memory_set_dirty_flags(
4058 addr1, (0xff & ~CODE_DIRTY_FLAG));
4060 addr1 += l;
4061 access_len -= l;
4064 if (xen_enabled()) {
4065 xen_invalidate_map_cache_entry(buffer);
4067 return;
4069 if (is_write) {
4070 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4072 qemu_vfree(bounce.buffer);
4073 bounce.buffer = NULL;
4074 cpu_notify_map_clients();
4077 /* warning: addr must be aligned */
4078 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4079 enum device_endian endian)
4081 int io_index;
4082 uint8_t *ptr;
4083 uint32_t val;
4084 MemoryRegionSection section;
4086 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4088 if (!is_ram_rom_romd(&section)) {
4089 /* I/O case */
4090 io_index = memory_region_get_ram_addr(section.mr)
4091 & (IO_MEM_NB_ENTRIES - 1);
4092 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4093 val = io_mem_read(io_index, addr, 4);
4094 #if defined(TARGET_WORDS_BIGENDIAN)
4095 if (endian == DEVICE_LITTLE_ENDIAN) {
4096 val = bswap32(val);
4098 #else
4099 if (endian == DEVICE_BIG_ENDIAN) {
4100 val = bswap32(val);
4102 #endif
4103 } else {
4104 /* RAM case */
4105 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4106 & TARGET_PAGE_MASK)
4107 + section.offset_within_region) +
4108 (addr & ~TARGET_PAGE_MASK);
4109 switch (endian) {
4110 case DEVICE_LITTLE_ENDIAN:
4111 val = ldl_le_p(ptr);
4112 break;
4113 case DEVICE_BIG_ENDIAN:
4114 val = ldl_be_p(ptr);
4115 break;
4116 default:
4117 val = ldl_p(ptr);
4118 break;
4121 return val;
4124 uint32_t ldl_phys(target_phys_addr_t addr)
4126 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4129 uint32_t ldl_le_phys(target_phys_addr_t addr)
4131 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4134 uint32_t ldl_be_phys(target_phys_addr_t addr)
4136 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4139 /* warning: addr must be aligned */
4140 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4141 enum device_endian endian)
4143 int io_index;
4144 uint8_t *ptr;
4145 uint64_t val;
4146 MemoryRegionSection section;
4148 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4150 if (!is_ram_rom_romd(&section)) {
4151 /* I/O case */
4152 io_index = memory_region_get_ram_addr(section.mr)
4153 & (IO_MEM_NB_ENTRIES - 1);
4154 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4156 /* XXX This is broken when device endian != cpu endian.
4157 Fix and add "endian" variable check */
4158 #ifdef TARGET_WORDS_BIGENDIAN
4159 val = io_mem_read(io_index, addr, 4) << 32;
4160 val |= io_mem_read(io_index, addr + 4, 4);
4161 #else
4162 val = io_mem_read(io_index, addr, 4);
4163 val |= io_mem_read(io_index, addr + 4, 4) << 32;
4164 #endif
4165 } else {
4166 /* RAM case */
4167 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4168 & TARGET_PAGE_MASK)
4169 + section.offset_within_region)
4170 + (addr & ~TARGET_PAGE_MASK);
4171 switch (endian) {
4172 case DEVICE_LITTLE_ENDIAN:
4173 val = ldq_le_p(ptr);
4174 break;
4175 case DEVICE_BIG_ENDIAN:
4176 val = ldq_be_p(ptr);
4177 break;
4178 default:
4179 val = ldq_p(ptr);
4180 break;
4183 return val;
4186 uint64_t ldq_phys(target_phys_addr_t addr)
4188 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4191 uint64_t ldq_le_phys(target_phys_addr_t addr)
4193 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4196 uint64_t ldq_be_phys(target_phys_addr_t addr)
4198 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4201 /* XXX: optimize */
4202 uint32_t ldub_phys(target_phys_addr_t addr)
4204 uint8_t val;
4205 cpu_physical_memory_read(addr, &val, 1);
4206 return val;
4209 /* warning: addr must be aligned */
4210 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4211 enum device_endian endian)
4213 int io_index;
4214 uint8_t *ptr;
4215 uint64_t val;
4216 MemoryRegionSection section;
4218 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4220 if (!is_ram_rom_romd(&section)) {
4221 /* I/O case */
4222 io_index = memory_region_get_ram_addr(section.mr)
4223 & (IO_MEM_NB_ENTRIES - 1);
4224 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4225 val = io_mem_read(io_index, addr, 2);
4226 #if defined(TARGET_WORDS_BIGENDIAN)
4227 if (endian == DEVICE_LITTLE_ENDIAN) {
4228 val = bswap16(val);
4230 #else
4231 if (endian == DEVICE_BIG_ENDIAN) {
4232 val = bswap16(val);
4234 #endif
4235 } else {
4236 /* RAM case */
4237 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4238 & TARGET_PAGE_MASK)
4239 + section.offset_within_region)
4240 + (addr & ~TARGET_PAGE_MASK);
4241 switch (endian) {
4242 case DEVICE_LITTLE_ENDIAN:
4243 val = lduw_le_p(ptr);
4244 break;
4245 case DEVICE_BIG_ENDIAN:
4246 val = lduw_be_p(ptr);
4247 break;
4248 default:
4249 val = lduw_p(ptr);
4250 break;
4253 return val;
4256 uint32_t lduw_phys(target_phys_addr_t addr)
4258 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4261 uint32_t lduw_le_phys(target_phys_addr_t addr)
4263 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4266 uint32_t lduw_be_phys(target_phys_addr_t addr)
4268 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4271 /* warning: addr must be aligned. The ram page is not masked as dirty
4272 and the code inside is not invalidated. It is useful if the dirty
4273 bits are used to track modified PTEs */
4274 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4276 int io_index;
4277 uint8_t *ptr;
4278 MemoryRegionSection section;
4280 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4282 if (!memory_region_is_ram(section.mr) || section.readonly) {
4283 if (memory_region_is_ram(section.mr)) {
4284 io_index = io_mem_rom.ram_addr;
4285 } else {
4286 io_index = memory_region_get_ram_addr(section.mr);
4288 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4289 io_mem_write(io_index, addr, val, 4);
4290 } else {
4291 unsigned long addr1 = (memory_region_get_ram_addr(section.mr)
4292 & TARGET_PAGE_MASK)
4293 + section.offset_within_region
4294 + (addr & ~TARGET_PAGE_MASK);
4295 ptr = qemu_get_ram_ptr(addr1);
4296 stl_p(ptr, val);
4298 if (unlikely(in_migration)) {
4299 if (!cpu_physical_memory_is_dirty(addr1)) {
4300 /* invalidate code */
4301 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4302 /* set dirty bit */
4303 cpu_physical_memory_set_dirty_flags(
4304 addr1, (0xff & ~CODE_DIRTY_FLAG));
4310 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4312 int io_index;
4313 uint8_t *ptr;
4314 MemoryRegionSection section;
4316 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4318 if (!memory_region_is_ram(section.mr) || section.readonly) {
4319 if (memory_region_is_ram(section.mr)) {
4320 io_index = io_mem_rom.ram_addr;
4321 } else {
4322 io_index = memory_region_get_ram_addr(section.mr)
4323 & (IO_MEM_NB_ENTRIES - 1);
4325 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4326 #ifdef TARGET_WORDS_BIGENDIAN
4327 io_mem_write(io_index, addr, val >> 32, 4);
4328 io_mem_write(io_index, addr + 4, (uint32_t)val, 4);
4329 #else
4330 io_mem_write(io_index, addr, (uint32_t)val, 4);
4331 io_mem_write(io_index, addr + 4, val >> 32, 4);
4332 #endif
4333 } else {
4334 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4335 & TARGET_PAGE_MASK)
4336 + section.offset_within_region)
4337 + (addr & ~TARGET_PAGE_MASK);
4338 stq_p(ptr, val);
4342 /* warning: addr must be aligned */
4343 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4344 enum device_endian endian)
4346 int io_index;
4347 uint8_t *ptr;
4348 MemoryRegionSection section;
4350 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4352 if (!memory_region_is_ram(section.mr) || section.readonly) {
4353 if (memory_region_is_ram(section.mr)) {
4354 io_index = io_mem_rom.ram_addr;
4355 } else {
4356 io_index = memory_region_get_ram_addr(section.mr)
4357 & (IO_MEM_NB_ENTRIES - 1);
4359 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4360 #if defined(TARGET_WORDS_BIGENDIAN)
4361 if (endian == DEVICE_LITTLE_ENDIAN) {
4362 val = bswap32(val);
4364 #else
4365 if (endian == DEVICE_BIG_ENDIAN) {
4366 val = bswap32(val);
4368 #endif
4369 io_mem_write(io_index, addr, val, 4);
4370 } else {
4371 unsigned long addr1;
4372 addr1 = (memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK)
4373 + section.offset_within_region
4374 + (addr & ~TARGET_PAGE_MASK);
4375 /* RAM case */
4376 ptr = qemu_get_ram_ptr(addr1);
4377 switch (endian) {
4378 case DEVICE_LITTLE_ENDIAN:
4379 stl_le_p(ptr, val);
4380 break;
4381 case DEVICE_BIG_ENDIAN:
4382 stl_be_p(ptr, val);
4383 break;
4384 default:
4385 stl_p(ptr, val);
4386 break;
4388 if (!cpu_physical_memory_is_dirty(addr1)) {
4389 /* invalidate code */
4390 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4391 /* set dirty bit */
4392 cpu_physical_memory_set_dirty_flags(addr1,
4393 (0xff & ~CODE_DIRTY_FLAG));
4398 void stl_phys(target_phys_addr_t addr, uint32_t val)
4400 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4403 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4405 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4408 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4410 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4413 /* XXX: optimize */
4414 void stb_phys(target_phys_addr_t addr, uint32_t val)
4416 uint8_t v = val;
4417 cpu_physical_memory_write(addr, &v, 1);
4420 /* warning: addr must be aligned */
4421 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4422 enum device_endian endian)
4424 int io_index;
4425 uint8_t *ptr;
4426 MemoryRegionSection section;
4428 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4430 if (!memory_region_is_ram(section.mr) || section.readonly) {
4431 if (memory_region_is_ram(section.mr)) {
4432 io_index = io_mem_rom.ram_addr;
4433 } else {
4434 io_index = memory_region_get_ram_addr(section.mr)
4435 & (IO_MEM_NB_ENTRIES - 1);
4437 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4438 #if defined(TARGET_WORDS_BIGENDIAN)
4439 if (endian == DEVICE_LITTLE_ENDIAN) {
4440 val = bswap16(val);
4442 #else
4443 if (endian == DEVICE_BIG_ENDIAN) {
4444 val = bswap16(val);
4446 #endif
4447 io_mem_write(io_index, addr, val, 2);
4448 } else {
4449 unsigned long addr1;
4450 addr1 = (memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK)
4451 + section.offset_within_region + (addr & ~TARGET_PAGE_MASK);
4452 /* RAM case */
4453 ptr = qemu_get_ram_ptr(addr1);
4454 switch (endian) {
4455 case DEVICE_LITTLE_ENDIAN:
4456 stw_le_p(ptr, val);
4457 break;
4458 case DEVICE_BIG_ENDIAN:
4459 stw_be_p(ptr, val);
4460 break;
4461 default:
4462 stw_p(ptr, val);
4463 break;
4465 if (!cpu_physical_memory_is_dirty(addr1)) {
4466 /* invalidate code */
4467 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4468 /* set dirty bit */
4469 cpu_physical_memory_set_dirty_flags(addr1,
4470 (0xff & ~CODE_DIRTY_FLAG));
4475 void stw_phys(target_phys_addr_t addr, uint32_t val)
4477 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4480 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4482 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4485 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4487 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4490 /* XXX: optimize */
4491 void stq_phys(target_phys_addr_t addr, uint64_t val)
4493 val = tswap64(val);
4494 cpu_physical_memory_write(addr, &val, 8);
4497 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4499 val = cpu_to_le64(val);
4500 cpu_physical_memory_write(addr, &val, 8);
4503 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4505 val = cpu_to_be64(val);
4506 cpu_physical_memory_write(addr, &val, 8);
4509 /* virtual memory access for debug (includes writing to ROM) */
4510 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4511 uint8_t *buf, int len, int is_write)
4513 int l;
4514 target_phys_addr_t phys_addr;
4515 target_ulong page;
4517 while (len > 0) {
4518 page = addr & TARGET_PAGE_MASK;
4519 phys_addr = cpu_get_phys_page_debug(env, page);
4520 /* if no physical page mapped, return an error */
4521 if (phys_addr == -1)
4522 return -1;
4523 l = (page + TARGET_PAGE_SIZE) - addr;
4524 if (l > len)
4525 l = len;
4526 phys_addr += (addr & ~TARGET_PAGE_MASK);
4527 if (is_write)
4528 cpu_physical_memory_write_rom(phys_addr, buf, l);
4529 else
4530 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4531 len -= l;
4532 buf += l;
4533 addr += l;
4535 return 0;
4537 #endif
4539 /* in deterministic execution mode, instructions doing device I/Os
4540 must be at the end of the TB */
4541 void cpu_io_recompile(CPUState *env, void *retaddr)
4543 TranslationBlock *tb;
4544 uint32_t n, cflags;
4545 target_ulong pc, cs_base;
4546 uint64_t flags;
4548 tb = tb_find_pc((unsigned long)retaddr);
4549 if (!tb) {
4550 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4551 retaddr);
4553 n = env->icount_decr.u16.low + tb->icount;
4554 cpu_restore_state(tb, env, (unsigned long)retaddr);
4555 /* Calculate how many instructions had been executed before the fault
4556 occurred. */
4557 n = n - env->icount_decr.u16.low;
4558 /* Generate a new TB ending on the I/O insn. */
4559 n++;
4560 /* On MIPS and SH, delay slot instructions can only be restarted if
4561 they were already the first instruction in the TB. If this is not
4562 the first instruction in a TB then re-execute the preceding
4563 branch. */
4564 #if defined(TARGET_MIPS)
4565 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4566 env->active_tc.PC -= 4;
4567 env->icount_decr.u16.low++;
4568 env->hflags &= ~MIPS_HFLAG_BMASK;
4570 #elif defined(TARGET_SH4)
4571 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4572 && n > 1) {
4573 env->pc -= 2;
4574 env->icount_decr.u16.low++;
4575 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4577 #endif
4578 /* This should never happen. */
4579 if (n > CF_COUNT_MASK)
4580 cpu_abort(env, "TB too big during recompile");
4582 cflags = n | CF_LAST_IO;
4583 pc = tb->pc;
4584 cs_base = tb->cs_base;
4585 flags = tb->flags;
4586 tb_phys_invalidate(tb, -1);
4587 /* FIXME: In theory this could raise an exception. In practice
4588 we have already translated the block once so it's probably ok. */
4589 tb_gen_code(env, pc, cs_base, flags, cflags);
4590 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4591 the first in the TB) then we end up generating a whole new TB and
4592 repeating the fault, which is horribly inefficient.
4593 Better would be to execute just this insn uncached, or generate a
4594 second new TB. */
4595 cpu_resume_from_signal(env, NULL);
4598 #if !defined(CONFIG_USER_ONLY)
4600 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4602 int i, target_code_size, max_target_code_size;
4603 int direct_jmp_count, direct_jmp2_count, cross_page;
4604 TranslationBlock *tb;
4606 target_code_size = 0;
4607 max_target_code_size = 0;
4608 cross_page = 0;
4609 direct_jmp_count = 0;
4610 direct_jmp2_count = 0;
4611 for(i = 0; i < nb_tbs; i++) {
4612 tb = &tbs[i];
4613 target_code_size += tb->size;
4614 if (tb->size > max_target_code_size)
4615 max_target_code_size = tb->size;
4616 if (tb->page_addr[1] != -1)
4617 cross_page++;
4618 if (tb->tb_next_offset[0] != 0xffff) {
4619 direct_jmp_count++;
4620 if (tb->tb_next_offset[1] != 0xffff) {
4621 direct_jmp2_count++;
4625 /* XXX: avoid using doubles ? */
4626 cpu_fprintf(f, "Translation buffer state:\n");
4627 cpu_fprintf(f, "gen code size %td/%ld\n",
4628 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4629 cpu_fprintf(f, "TB count %d/%d\n",
4630 nb_tbs, code_gen_max_blocks);
4631 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4632 nb_tbs ? target_code_size / nb_tbs : 0,
4633 max_target_code_size);
4634 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4635 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4636 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4637 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4638 cross_page,
4639 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4640 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4641 direct_jmp_count,
4642 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4643 direct_jmp2_count,
4644 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4645 cpu_fprintf(f, "\nStatistics:\n");
4646 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4647 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4648 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4649 tcg_dump_info(f, cpu_fprintf);
4652 /* NOTE: this function can trigger an exception */
4653 /* NOTE2: the returned address is not exactly the physical address: it
4654 is the offset relative to phys_ram_base */
4655 tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
4657 int mmu_idx, page_index, pd;
4658 void *p;
4660 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
4661 mmu_idx = cpu_mmu_index(env1);
4662 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
4663 (addr & TARGET_PAGE_MASK))) {
4664 ldub_code(addr);
4666 pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
4667 if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
4668 && !io_mem_region[pd]->rom_device) {
4669 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
4670 cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
4671 #else
4672 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
4673 #endif
4675 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
4676 return qemu_ram_addr_from_host_nofail(p);
4680 * A helper function for the _utterly broken_ virtio device model to find out if
4681 * it's running on a big endian machine. Don't do this at home kids!
4683 bool virtio_is_big_endian(void);
4684 bool virtio_is_big_endian(void)
4686 #if defined(TARGET_WORDS_BIGENDIAN)
4687 return true;
4688 #else
4689 return false;
4690 #endif
4693 #define MMUSUFFIX _cmmu
4694 #undef GETPC
4695 #define GETPC() NULL
4696 #define env cpu_single_env
4697 #define SOFTMMU_CODE_ACCESS
4699 #define SHIFT 0
4700 #include "softmmu_template.h"
4702 #define SHIFT 1
4703 #include "softmmu_template.h"
4705 #define SHIFT 2
4706 #include "softmmu_template.h"
4708 #define SHIFT 3
4709 #include "softmmu_template.h"
4711 #undef env
4713 #endif