vfio-pci: Fix BAR->VFIODevice translation in
[qemu.git] / exec.c
blob7899042ce9a3afb49c91fe01ce0ff67a2372fb25
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #include "cputlb.h"
62 #define WANT_EXEC_OBSOLETE
63 #include "exec-obsolete.h"
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_UNASSIGNED
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32) && !defined(_WIN64)
97 #define code_gen_section \
98 __attribute__((aligned (16)))
99 #else
100 #define code_gen_section \
101 __attribute__((aligned (32)))
102 #endif
104 uint8_t code_gen_prologue[1024] code_gen_section;
105 static uint8_t *code_gen_buffer;
106 static unsigned long code_gen_buffer_size;
107 /* threshold to flush the translated code buffer */
108 static unsigned long code_gen_buffer_max_size;
109 static uint8_t *code_gen_ptr;
111 #if !defined(CONFIG_USER_ONLY)
112 int phys_ram_fd;
113 static int in_migration;
115 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
117 static MemoryRegion *system_memory;
118 static MemoryRegion *system_io;
120 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
121 static MemoryRegion io_mem_subpage_ram;
123 #endif
125 CPUArchState *first_cpu;
126 /* current CPU in the current thread. It is only valid inside
127 cpu_exec() */
128 DEFINE_TLS(CPUArchState *,cpu_single_env);
129 /* 0 = Do not count executed instructions.
130 1 = Precise instruction counting.
131 2 = Adaptive rate instruction counting. */
132 int use_icount = 0;
134 typedef struct PageDesc {
135 /* list of TBs intersecting this ram page */
136 TranslationBlock *first_tb;
137 /* in order to optimize self modifying code, we count the number
138 of lookups we do to a given page to use a bitmap */
139 unsigned int code_write_count;
140 uint8_t *code_bitmap;
141 #if defined(CONFIG_USER_ONLY)
142 unsigned long flags;
143 #endif
144 } PageDesc;
146 /* In system mode we want L1_MAP to be based on ram offsets,
147 while in user mode we want it to be based on virtual addresses. */
148 #if !defined(CONFIG_USER_ONLY)
149 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
153 #endif
154 #else
155 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
156 #endif
158 /* Size of the L2 (and L3, etc) page tables. */
159 #define L2_BITS 10
160 #define L2_SIZE (1 << L2_BITS)
162 #define P_L2_LEVELS \
163 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
165 /* The bits remaining after N lower levels of page tables. */
166 #define V_L1_BITS_REM \
167 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
169 #if V_L1_BITS_REM < 4
170 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
171 #else
172 #define V_L1_BITS V_L1_BITS_REM
173 #endif
175 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
177 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
179 uintptr_t qemu_real_host_page_size;
180 uintptr_t qemu_host_page_size;
181 uintptr_t qemu_host_page_mask;
183 /* This is a multi-level map on the virtual address space.
184 The bottom level has pointers to PageDesc. */
185 static void *l1_map[V_L1_SIZE];
187 #if !defined(CONFIG_USER_ONLY)
188 typedef struct PhysPageEntry PhysPageEntry;
190 static MemoryRegionSection *phys_sections;
191 static unsigned phys_sections_nb, phys_sections_nb_alloc;
192 static uint16_t phys_section_unassigned;
193 static uint16_t phys_section_notdirty;
194 static uint16_t phys_section_rom;
195 static uint16_t phys_section_watch;
197 struct PhysPageEntry {
198 uint16_t is_leaf : 1;
199 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
200 uint16_t ptr : 15;
203 /* Simple allocator for PhysPageEntry nodes */
204 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
205 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
207 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
209 /* This is a multi-level map on the physical address space.
210 The bottom level has pointers to MemoryRegionSections. */
211 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
213 static void io_mem_init(void);
214 static void memory_map_init(void);
216 static MemoryRegion io_mem_watch;
217 #endif
219 /* statistics */
220 static int tb_flush_count;
221 static int tb_phys_invalidate_count;
223 #ifdef _WIN32
224 static void map_exec(void *addr, long size)
226 DWORD old_protect;
227 VirtualProtect(addr, size,
228 PAGE_EXECUTE_READWRITE, &old_protect);
231 #else
232 static void map_exec(void *addr, long size)
234 unsigned long start, end, page_size;
236 page_size = getpagesize();
237 start = (unsigned long)addr;
238 start &= ~(page_size - 1);
240 end = (unsigned long)addr + size;
241 end += page_size - 1;
242 end &= ~(page_size - 1);
244 mprotect((void *)start, end - start,
245 PROT_READ | PROT_WRITE | PROT_EXEC);
247 #endif
249 static void page_init(void)
251 /* NOTE: we can always suppose that qemu_host_page_size >=
252 TARGET_PAGE_SIZE */
253 #ifdef _WIN32
255 SYSTEM_INFO system_info;
257 GetSystemInfo(&system_info);
258 qemu_real_host_page_size = system_info.dwPageSize;
260 #else
261 qemu_real_host_page_size = getpagesize();
262 #endif
263 if (qemu_host_page_size == 0)
264 qemu_host_page_size = qemu_real_host_page_size;
265 if (qemu_host_page_size < TARGET_PAGE_SIZE)
266 qemu_host_page_size = TARGET_PAGE_SIZE;
267 qemu_host_page_mask = ~(qemu_host_page_size - 1);
269 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
271 #ifdef HAVE_KINFO_GETVMMAP
272 struct kinfo_vmentry *freep;
273 int i, cnt;
275 freep = kinfo_getvmmap(getpid(), &cnt);
276 if (freep) {
277 mmap_lock();
278 for (i = 0; i < cnt; i++) {
279 unsigned long startaddr, endaddr;
281 startaddr = freep[i].kve_start;
282 endaddr = freep[i].kve_end;
283 if (h2g_valid(startaddr)) {
284 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
286 if (h2g_valid(endaddr)) {
287 endaddr = h2g(endaddr);
288 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
289 } else {
290 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
291 endaddr = ~0ul;
292 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
293 #endif
297 free(freep);
298 mmap_unlock();
300 #else
301 FILE *f;
303 last_brk = (unsigned long)sbrk(0);
305 f = fopen("/compat/linux/proc/self/maps", "r");
306 if (f) {
307 mmap_lock();
309 do {
310 unsigned long startaddr, endaddr;
311 int n;
313 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
315 if (n == 2 && h2g_valid(startaddr)) {
316 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
318 if (h2g_valid(endaddr)) {
319 endaddr = h2g(endaddr);
320 } else {
321 endaddr = ~0ul;
323 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
325 } while (!feof(f));
327 fclose(f);
328 mmap_unlock();
330 #endif
332 #endif
335 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
337 PageDesc *pd;
338 void **lp;
339 int i;
341 #if defined(CONFIG_USER_ONLY)
342 /* We can't use g_malloc because it may recurse into a locked mutex. */
343 # define ALLOC(P, SIZE) \
344 do { \
345 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
346 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
347 } while (0)
348 #else
349 # define ALLOC(P, SIZE) \
350 do { P = g_malloc0(SIZE); } while (0)
351 #endif
353 /* Level 1. Always allocated. */
354 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
356 /* Level 2..N-1. */
357 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
358 void **p = *lp;
360 if (p == NULL) {
361 if (!alloc) {
362 return NULL;
364 ALLOC(p, sizeof(void *) * L2_SIZE);
365 *lp = p;
368 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
371 pd = *lp;
372 if (pd == NULL) {
373 if (!alloc) {
374 return NULL;
376 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
377 *lp = pd;
380 #undef ALLOC
382 return pd + (index & (L2_SIZE - 1));
385 static inline PageDesc *page_find(tb_page_addr_t index)
387 return page_find_alloc(index, 0);
390 #if !defined(CONFIG_USER_ONLY)
392 static void phys_map_node_reserve(unsigned nodes)
394 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
395 typedef PhysPageEntry Node[L2_SIZE];
396 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
397 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
398 phys_map_nodes_nb + nodes);
399 phys_map_nodes = g_renew(Node, phys_map_nodes,
400 phys_map_nodes_nb_alloc);
404 static uint16_t phys_map_node_alloc(void)
406 unsigned i;
407 uint16_t ret;
409 ret = phys_map_nodes_nb++;
410 assert(ret != PHYS_MAP_NODE_NIL);
411 assert(ret != phys_map_nodes_nb_alloc);
412 for (i = 0; i < L2_SIZE; ++i) {
413 phys_map_nodes[ret][i].is_leaf = 0;
414 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
416 return ret;
419 static void phys_map_nodes_reset(void)
421 phys_map_nodes_nb = 0;
425 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
426 target_phys_addr_t *nb, uint16_t leaf,
427 int level)
429 PhysPageEntry *p;
430 int i;
431 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
433 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
434 lp->ptr = phys_map_node_alloc();
435 p = phys_map_nodes[lp->ptr];
436 if (level == 0) {
437 for (i = 0; i < L2_SIZE; i++) {
438 p[i].is_leaf = 1;
439 p[i].ptr = phys_section_unassigned;
442 } else {
443 p = phys_map_nodes[lp->ptr];
445 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
447 while (*nb && lp < &p[L2_SIZE]) {
448 if ((*index & (step - 1)) == 0 && *nb >= step) {
449 lp->is_leaf = true;
450 lp->ptr = leaf;
451 *index += step;
452 *nb -= step;
453 } else {
454 phys_page_set_level(lp, index, nb, leaf, level - 1);
456 ++lp;
460 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
461 uint16_t leaf)
463 /* Wildly overreserve - it doesn't matter much. */
464 phys_map_node_reserve(3 * P_L2_LEVELS);
466 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
469 MemoryRegionSection *phys_page_find(target_phys_addr_t index)
471 PhysPageEntry lp = phys_map;
472 PhysPageEntry *p;
473 int i;
474 uint16_t s_index = phys_section_unassigned;
476 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
477 if (lp.ptr == PHYS_MAP_NODE_NIL) {
478 goto not_found;
480 p = phys_map_nodes[lp.ptr];
481 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
484 s_index = lp.ptr;
485 not_found:
486 return &phys_sections[s_index];
489 bool memory_region_is_unassigned(MemoryRegion *mr)
491 return mr != &io_mem_ram && mr != &io_mem_rom
492 && mr != &io_mem_notdirty && !mr->rom_device
493 && mr != &io_mem_watch;
496 #define mmap_lock() do { } while(0)
497 #define mmap_unlock() do { } while(0)
498 #endif
500 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
502 #if defined(CONFIG_USER_ONLY)
503 /* Currently it is not recommended to allocate big chunks of data in
504 user mode. It will change when a dedicated libc will be used */
505 #define USE_STATIC_CODE_GEN_BUFFER
506 #endif
508 #ifdef USE_STATIC_CODE_GEN_BUFFER
509 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
510 __attribute__((aligned (CODE_GEN_ALIGN)));
511 #endif
513 static void code_gen_alloc(unsigned long tb_size)
515 #ifdef USE_STATIC_CODE_GEN_BUFFER
516 code_gen_buffer = static_code_gen_buffer;
517 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
518 map_exec(code_gen_buffer, code_gen_buffer_size);
519 #else
520 code_gen_buffer_size = tb_size;
521 if (code_gen_buffer_size == 0) {
522 #if defined(CONFIG_USER_ONLY)
523 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
524 #else
525 /* XXX: needs adjustments */
526 code_gen_buffer_size = (unsigned long)(ram_size / 4);
527 #endif
529 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
530 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
531 /* The code gen buffer location may have constraints depending on
532 the host cpu and OS */
533 #if defined(__linux__)
535 int flags;
536 void *start = NULL;
538 flags = MAP_PRIVATE | MAP_ANONYMOUS;
539 #if defined(__x86_64__)
540 flags |= MAP_32BIT;
541 /* Cannot map more than that */
542 if (code_gen_buffer_size > (800 * 1024 * 1024))
543 code_gen_buffer_size = (800 * 1024 * 1024);
544 #elif defined(__sparc__) && HOST_LONG_BITS == 64
545 // Map the buffer below 2G, so we can use direct calls and branches
546 start = (void *) 0x40000000UL;
547 if (code_gen_buffer_size > (512 * 1024 * 1024))
548 code_gen_buffer_size = (512 * 1024 * 1024);
549 #elif defined(__arm__)
550 /* Keep the buffer no bigger than 16MB to branch between blocks */
551 if (code_gen_buffer_size > 16 * 1024 * 1024)
552 code_gen_buffer_size = 16 * 1024 * 1024;
553 #elif defined(__s390x__)
554 /* Map the buffer so that we can use direct calls and branches. */
555 /* We have a +- 4GB range on the branches; leave some slop. */
556 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
557 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
559 start = (void *)0x90000000UL;
560 #endif
561 code_gen_buffer = mmap(start, code_gen_buffer_size,
562 PROT_WRITE | PROT_READ | PROT_EXEC,
563 flags, -1, 0);
564 if (code_gen_buffer == MAP_FAILED) {
565 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
566 exit(1);
569 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
570 || defined(__DragonFly__) || defined(__OpenBSD__) \
571 || defined(__NetBSD__)
573 int flags;
574 void *addr = NULL;
575 flags = MAP_PRIVATE | MAP_ANONYMOUS;
576 #if defined(__x86_64__)
577 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
578 * 0x40000000 is free */
579 flags |= MAP_FIXED;
580 addr = (void *)0x40000000;
581 /* Cannot map more than that */
582 if (code_gen_buffer_size > (800 * 1024 * 1024))
583 code_gen_buffer_size = (800 * 1024 * 1024);
584 #elif defined(__sparc__) && HOST_LONG_BITS == 64
585 // Map the buffer below 2G, so we can use direct calls and branches
586 addr = (void *) 0x40000000UL;
587 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
588 code_gen_buffer_size = (512 * 1024 * 1024);
590 #endif
591 code_gen_buffer = mmap(addr, code_gen_buffer_size,
592 PROT_WRITE | PROT_READ | PROT_EXEC,
593 flags, -1, 0);
594 if (code_gen_buffer == MAP_FAILED) {
595 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
596 exit(1);
599 #else
600 code_gen_buffer = g_malloc(code_gen_buffer_size);
601 map_exec(code_gen_buffer, code_gen_buffer_size);
602 #endif
603 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
604 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
605 code_gen_buffer_max_size = code_gen_buffer_size -
606 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
607 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
608 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
611 /* Must be called before using the QEMU cpus. 'tb_size' is the size
612 (in bytes) allocated to the translation buffer. Zero means default
613 size. */
614 void tcg_exec_init(unsigned long tb_size)
616 cpu_gen_init();
617 code_gen_alloc(tb_size);
618 code_gen_ptr = code_gen_buffer;
619 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
620 page_init();
621 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
622 /* There's no guest base to take into account, so go ahead and
623 initialize the prologue now. */
624 tcg_prologue_init(&tcg_ctx);
625 #endif
628 bool tcg_enabled(void)
630 return code_gen_buffer != NULL;
633 void cpu_exec_init_all(void)
635 #if !defined(CONFIG_USER_ONLY)
636 memory_map_init();
637 io_mem_init();
638 #endif
641 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
643 static int cpu_common_post_load(void *opaque, int version_id)
645 CPUArchState *env = opaque;
647 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
648 version_id is increased. */
649 env->interrupt_request &= ~0x01;
650 tlb_flush(env, 1);
652 return 0;
655 static const VMStateDescription vmstate_cpu_common = {
656 .name = "cpu_common",
657 .version_id = 1,
658 .minimum_version_id = 1,
659 .minimum_version_id_old = 1,
660 .post_load = cpu_common_post_load,
661 .fields = (VMStateField []) {
662 VMSTATE_UINT32(halted, CPUArchState),
663 VMSTATE_UINT32(interrupt_request, CPUArchState),
664 VMSTATE_END_OF_LIST()
667 #endif
669 CPUArchState *qemu_get_cpu(int cpu)
671 CPUArchState *env = first_cpu;
673 while (env) {
674 if (env->cpu_index == cpu)
675 break;
676 env = env->next_cpu;
679 return env;
682 void cpu_exec_init(CPUArchState *env)
684 CPUArchState **penv;
685 int cpu_index;
687 #if defined(CONFIG_USER_ONLY)
688 cpu_list_lock();
689 #endif
690 env->next_cpu = NULL;
691 penv = &first_cpu;
692 cpu_index = 0;
693 while (*penv != NULL) {
694 penv = &(*penv)->next_cpu;
695 cpu_index++;
697 env->cpu_index = cpu_index;
698 env->numa_node = 0;
699 QTAILQ_INIT(&env->breakpoints);
700 QTAILQ_INIT(&env->watchpoints);
701 #ifndef CONFIG_USER_ONLY
702 env->thread_id = qemu_get_thread_id();
703 #endif
704 *penv = env;
705 #if defined(CONFIG_USER_ONLY)
706 cpu_list_unlock();
707 #endif
708 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
709 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
710 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
711 cpu_save, cpu_load, env);
712 #endif
715 /* Allocate a new translation block. Flush the translation buffer if
716 too many translation blocks or too much generated code. */
717 static TranslationBlock *tb_alloc(target_ulong pc)
719 TranslationBlock *tb;
721 if (nb_tbs >= code_gen_max_blocks ||
722 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
723 return NULL;
724 tb = &tbs[nb_tbs++];
725 tb->pc = pc;
726 tb->cflags = 0;
727 return tb;
730 void tb_free(TranslationBlock *tb)
732 /* In practice this is mostly used for single use temporary TB
733 Ignore the hard cases and just back up if this TB happens to
734 be the last one generated. */
735 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
736 code_gen_ptr = tb->tc_ptr;
737 nb_tbs--;
741 static inline void invalidate_page_bitmap(PageDesc *p)
743 if (p->code_bitmap) {
744 g_free(p->code_bitmap);
745 p->code_bitmap = NULL;
747 p->code_write_count = 0;
750 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
752 static void page_flush_tb_1 (int level, void **lp)
754 int i;
756 if (*lp == NULL) {
757 return;
759 if (level == 0) {
760 PageDesc *pd = *lp;
761 for (i = 0; i < L2_SIZE; ++i) {
762 pd[i].first_tb = NULL;
763 invalidate_page_bitmap(pd + i);
765 } else {
766 void **pp = *lp;
767 for (i = 0; i < L2_SIZE; ++i) {
768 page_flush_tb_1 (level - 1, pp + i);
773 static void page_flush_tb(void)
775 int i;
776 for (i = 0; i < V_L1_SIZE; i++) {
777 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
781 /* flush all the translation blocks */
782 /* XXX: tb_flush is currently not thread safe */
783 void tb_flush(CPUArchState *env1)
785 CPUArchState *env;
786 #if defined(DEBUG_FLUSH)
787 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
788 (unsigned long)(code_gen_ptr - code_gen_buffer),
789 nb_tbs, nb_tbs > 0 ?
790 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
791 #endif
792 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
793 cpu_abort(env1, "Internal error: code buffer overflow\n");
795 nb_tbs = 0;
797 for(env = first_cpu; env != NULL; env = env->next_cpu) {
798 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
801 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
802 page_flush_tb();
804 code_gen_ptr = code_gen_buffer;
805 /* XXX: flush processor icache at this point if cache flush is
806 expensive */
807 tb_flush_count++;
810 #ifdef DEBUG_TB_CHECK
812 static void tb_invalidate_check(target_ulong address)
814 TranslationBlock *tb;
815 int i;
816 address &= TARGET_PAGE_MASK;
817 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
818 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
819 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
820 address >= tb->pc + tb->size)) {
821 printf("ERROR invalidate: address=" TARGET_FMT_lx
822 " PC=%08lx size=%04x\n",
823 address, (long)tb->pc, tb->size);
829 /* verify that all the pages have correct rights for code */
830 static void tb_page_check(void)
832 TranslationBlock *tb;
833 int i, flags1, flags2;
835 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
836 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
837 flags1 = page_get_flags(tb->pc);
838 flags2 = page_get_flags(tb->pc + tb->size - 1);
839 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
840 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
841 (long)tb->pc, tb->size, flags1, flags2);
847 #endif
849 /* invalidate one TB */
850 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
851 int next_offset)
853 TranslationBlock *tb1;
854 for(;;) {
855 tb1 = *ptb;
856 if (tb1 == tb) {
857 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
858 break;
860 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
864 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
866 TranslationBlock *tb1;
867 unsigned int n1;
869 for(;;) {
870 tb1 = *ptb;
871 n1 = (uintptr_t)tb1 & 3;
872 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
873 if (tb1 == tb) {
874 *ptb = tb1->page_next[n1];
875 break;
877 ptb = &tb1->page_next[n1];
881 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
883 TranslationBlock *tb1, **ptb;
884 unsigned int n1;
886 ptb = &tb->jmp_next[n];
887 tb1 = *ptb;
888 if (tb1) {
889 /* find tb(n) in circular list */
890 for(;;) {
891 tb1 = *ptb;
892 n1 = (uintptr_t)tb1 & 3;
893 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
894 if (n1 == n && tb1 == tb)
895 break;
896 if (n1 == 2) {
897 ptb = &tb1->jmp_first;
898 } else {
899 ptb = &tb1->jmp_next[n1];
902 /* now we can suppress tb(n) from the list */
903 *ptb = tb->jmp_next[n];
905 tb->jmp_next[n] = NULL;
909 /* reset the jump entry 'n' of a TB so that it is not chained to
910 another TB */
911 static inline void tb_reset_jump(TranslationBlock *tb, int n)
913 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
916 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
918 CPUArchState *env;
919 PageDesc *p;
920 unsigned int h, n1;
921 tb_page_addr_t phys_pc;
922 TranslationBlock *tb1, *tb2;
924 /* remove the TB from the hash list */
925 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
926 h = tb_phys_hash_func(phys_pc);
927 tb_remove(&tb_phys_hash[h], tb,
928 offsetof(TranslationBlock, phys_hash_next));
930 /* remove the TB from the page list */
931 if (tb->page_addr[0] != page_addr) {
932 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
933 tb_page_remove(&p->first_tb, tb);
934 invalidate_page_bitmap(p);
936 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
937 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
938 tb_page_remove(&p->first_tb, tb);
939 invalidate_page_bitmap(p);
942 tb_invalidated_flag = 1;
944 /* remove the TB from the hash list */
945 h = tb_jmp_cache_hash_func(tb->pc);
946 for(env = first_cpu; env != NULL; env = env->next_cpu) {
947 if (env->tb_jmp_cache[h] == tb)
948 env->tb_jmp_cache[h] = NULL;
951 /* suppress this TB from the two jump lists */
952 tb_jmp_remove(tb, 0);
953 tb_jmp_remove(tb, 1);
955 /* suppress any remaining jumps to this TB */
956 tb1 = tb->jmp_first;
957 for(;;) {
958 n1 = (uintptr_t)tb1 & 3;
959 if (n1 == 2)
960 break;
961 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
962 tb2 = tb1->jmp_next[n1];
963 tb_reset_jump(tb1, n1);
964 tb1->jmp_next[n1] = NULL;
965 tb1 = tb2;
967 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
969 tb_phys_invalidate_count++;
972 static inline void set_bits(uint8_t *tab, int start, int len)
974 int end, mask, end1;
976 end = start + len;
977 tab += start >> 3;
978 mask = 0xff << (start & 7);
979 if ((start & ~7) == (end & ~7)) {
980 if (start < end) {
981 mask &= ~(0xff << (end & 7));
982 *tab |= mask;
984 } else {
985 *tab++ |= mask;
986 start = (start + 8) & ~7;
987 end1 = end & ~7;
988 while (start < end1) {
989 *tab++ = 0xff;
990 start += 8;
992 if (start < end) {
993 mask = ~(0xff << (end & 7));
994 *tab |= mask;
999 static void build_page_bitmap(PageDesc *p)
1001 int n, tb_start, tb_end;
1002 TranslationBlock *tb;
1004 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1006 tb = p->first_tb;
1007 while (tb != NULL) {
1008 n = (uintptr_t)tb & 3;
1009 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1010 /* NOTE: this is subtle as a TB may span two physical pages */
1011 if (n == 0) {
1012 /* NOTE: tb_end may be after the end of the page, but
1013 it is not a problem */
1014 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1015 tb_end = tb_start + tb->size;
1016 if (tb_end > TARGET_PAGE_SIZE)
1017 tb_end = TARGET_PAGE_SIZE;
1018 } else {
1019 tb_start = 0;
1020 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1022 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1023 tb = tb->page_next[n];
1027 TranslationBlock *tb_gen_code(CPUArchState *env,
1028 target_ulong pc, target_ulong cs_base,
1029 int flags, int cflags)
1031 TranslationBlock *tb;
1032 uint8_t *tc_ptr;
1033 tb_page_addr_t phys_pc, phys_page2;
1034 target_ulong virt_page2;
1035 int code_gen_size;
1037 phys_pc = get_page_addr_code(env, pc);
1038 tb = tb_alloc(pc);
1039 if (!tb) {
1040 /* flush must be done */
1041 tb_flush(env);
1042 /* cannot fail at this point */
1043 tb = tb_alloc(pc);
1044 /* Don't forget to invalidate previous TB info. */
1045 tb_invalidated_flag = 1;
1047 tc_ptr = code_gen_ptr;
1048 tb->tc_ptr = tc_ptr;
1049 tb->cs_base = cs_base;
1050 tb->flags = flags;
1051 tb->cflags = cflags;
1052 cpu_gen_code(env, tb, &code_gen_size);
1053 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1054 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1056 /* check next page if needed */
1057 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1058 phys_page2 = -1;
1059 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1060 phys_page2 = get_page_addr_code(env, virt_page2);
1062 tb_link_page(tb, phys_pc, phys_page2);
1063 return tb;
1067 * Invalidate all TBs which intersect with the target physical address range
1068 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1069 * 'is_cpu_write_access' should be true if called from a real cpu write
1070 * access: the virtual CPU will exit the current TB if code is modified inside
1071 * this TB.
1073 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1074 int is_cpu_write_access)
1076 while (start < end) {
1077 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1078 start &= TARGET_PAGE_MASK;
1079 start += TARGET_PAGE_SIZE;
1084 * Invalidate all TBs which intersect with the target physical address range
1085 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1086 * 'is_cpu_write_access' should be true if called from a real cpu write
1087 * access: the virtual CPU will exit the current TB if code is modified inside
1088 * this TB.
1090 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1091 int is_cpu_write_access)
1093 TranslationBlock *tb, *tb_next, *saved_tb;
1094 CPUArchState *env = cpu_single_env;
1095 tb_page_addr_t tb_start, tb_end;
1096 PageDesc *p;
1097 int n;
1098 #ifdef TARGET_HAS_PRECISE_SMC
1099 int current_tb_not_found = is_cpu_write_access;
1100 TranslationBlock *current_tb = NULL;
1101 int current_tb_modified = 0;
1102 target_ulong current_pc = 0;
1103 target_ulong current_cs_base = 0;
1104 int current_flags = 0;
1105 #endif /* TARGET_HAS_PRECISE_SMC */
1107 p = page_find(start >> TARGET_PAGE_BITS);
1108 if (!p)
1109 return;
1110 if (!p->code_bitmap &&
1111 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1112 is_cpu_write_access) {
1113 /* build code bitmap */
1114 build_page_bitmap(p);
1117 /* we remove all the TBs in the range [start, end[ */
1118 /* XXX: see if in some cases it could be faster to invalidate all the code */
1119 tb = p->first_tb;
1120 while (tb != NULL) {
1121 n = (uintptr_t)tb & 3;
1122 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1123 tb_next = tb->page_next[n];
1124 /* NOTE: this is subtle as a TB may span two physical pages */
1125 if (n == 0) {
1126 /* NOTE: tb_end may be after the end of the page, but
1127 it is not a problem */
1128 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1129 tb_end = tb_start + tb->size;
1130 } else {
1131 tb_start = tb->page_addr[1];
1132 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1134 if (!(tb_end <= start || tb_start >= end)) {
1135 #ifdef TARGET_HAS_PRECISE_SMC
1136 if (current_tb_not_found) {
1137 current_tb_not_found = 0;
1138 current_tb = NULL;
1139 if (env->mem_io_pc) {
1140 /* now we have a real cpu fault */
1141 current_tb = tb_find_pc(env->mem_io_pc);
1144 if (current_tb == tb &&
1145 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1146 /* If we are modifying the current TB, we must stop
1147 its execution. We could be more precise by checking
1148 that the modification is after the current PC, but it
1149 would require a specialized function to partially
1150 restore the CPU state */
1152 current_tb_modified = 1;
1153 cpu_restore_state(current_tb, env, env->mem_io_pc);
1154 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1155 &current_flags);
1157 #endif /* TARGET_HAS_PRECISE_SMC */
1158 /* we need to do that to handle the case where a signal
1159 occurs while doing tb_phys_invalidate() */
1160 saved_tb = NULL;
1161 if (env) {
1162 saved_tb = env->current_tb;
1163 env->current_tb = NULL;
1165 tb_phys_invalidate(tb, -1);
1166 if (env) {
1167 env->current_tb = saved_tb;
1168 if (env->interrupt_request && env->current_tb)
1169 cpu_interrupt(env, env->interrupt_request);
1172 tb = tb_next;
1174 #if !defined(CONFIG_USER_ONLY)
1175 /* if no code remaining, no need to continue to use slow writes */
1176 if (!p->first_tb) {
1177 invalidate_page_bitmap(p);
1178 if (is_cpu_write_access) {
1179 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1182 #endif
1183 #ifdef TARGET_HAS_PRECISE_SMC
1184 if (current_tb_modified) {
1185 /* we generate a block containing just the instruction
1186 modifying the memory. It will ensure that it cannot modify
1187 itself */
1188 env->current_tb = NULL;
1189 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1190 cpu_resume_from_signal(env, NULL);
1192 #endif
1195 /* len must be <= 8 and start must be a multiple of len */
1196 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1198 PageDesc *p;
1199 int offset, b;
1200 #if 0
1201 if (1) {
1202 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1203 cpu_single_env->mem_io_vaddr, len,
1204 cpu_single_env->eip,
1205 cpu_single_env->eip +
1206 (intptr_t)cpu_single_env->segs[R_CS].base);
1208 #endif
1209 p = page_find(start >> TARGET_PAGE_BITS);
1210 if (!p)
1211 return;
1212 if (p->code_bitmap) {
1213 offset = start & ~TARGET_PAGE_MASK;
1214 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1215 if (b & ((1 << len) - 1))
1216 goto do_invalidate;
1217 } else {
1218 do_invalidate:
1219 tb_invalidate_phys_page_range(start, start + len, 1);
1223 #if !defined(CONFIG_SOFTMMU)
1224 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1225 uintptr_t pc, void *puc)
1227 TranslationBlock *tb;
1228 PageDesc *p;
1229 int n;
1230 #ifdef TARGET_HAS_PRECISE_SMC
1231 TranslationBlock *current_tb = NULL;
1232 CPUArchState *env = cpu_single_env;
1233 int current_tb_modified = 0;
1234 target_ulong current_pc = 0;
1235 target_ulong current_cs_base = 0;
1236 int current_flags = 0;
1237 #endif
1239 addr &= TARGET_PAGE_MASK;
1240 p = page_find(addr >> TARGET_PAGE_BITS);
1241 if (!p)
1242 return;
1243 tb = p->first_tb;
1244 #ifdef TARGET_HAS_PRECISE_SMC
1245 if (tb && pc != 0) {
1246 current_tb = tb_find_pc(pc);
1248 #endif
1249 while (tb != NULL) {
1250 n = (uintptr_t)tb & 3;
1251 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1252 #ifdef TARGET_HAS_PRECISE_SMC
1253 if (current_tb == tb &&
1254 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1255 /* If we are modifying the current TB, we must stop
1256 its execution. We could be more precise by checking
1257 that the modification is after the current PC, but it
1258 would require a specialized function to partially
1259 restore the CPU state */
1261 current_tb_modified = 1;
1262 cpu_restore_state(current_tb, env, pc);
1263 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1264 &current_flags);
1266 #endif /* TARGET_HAS_PRECISE_SMC */
1267 tb_phys_invalidate(tb, addr);
1268 tb = tb->page_next[n];
1270 p->first_tb = NULL;
1271 #ifdef TARGET_HAS_PRECISE_SMC
1272 if (current_tb_modified) {
1273 /* we generate a block containing just the instruction
1274 modifying the memory. It will ensure that it cannot modify
1275 itself */
1276 env->current_tb = NULL;
1277 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1278 cpu_resume_from_signal(env, puc);
1280 #endif
1282 #endif
1284 /* add the tb in the target page and protect it if necessary */
1285 static inline void tb_alloc_page(TranslationBlock *tb,
1286 unsigned int n, tb_page_addr_t page_addr)
1288 PageDesc *p;
1289 #ifndef CONFIG_USER_ONLY
1290 bool page_already_protected;
1291 #endif
1293 tb->page_addr[n] = page_addr;
1294 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1295 tb->page_next[n] = p->first_tb;
1296 #ifndef CONFIG_USER_ONLY
1297 page_already_protected = p->first_tb != NULL;
1298 #endif
1299 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1300 invalidate_page_bitmap(p);
1302 #if defined(TARGET_HAS_SMC) || 1
1304 #if defined(CONFIG_USER_ONLY)
1305 if (p->flags & PAGE_WRITE) {
1306 target_ulong addr;
1307 PageDesc *p2;
1308 int prot;
1310 /* force the host page as non writable (writes will have a
1311 page fault + mprotect overhead) */
1312 page_addr &= qemu_host_page_mask;
1313 prot = 0;
1314 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1315 addr += TARGET_PAGE_SIZE) {
1317 p2 = page_find (addr >> TARGET_PAGE_BITS);
1318 if (!p2)
1319 continue;
1320 prot |= p2->flags;
1321 p2->flags &= ~PAGE_WRITE;
1323 mprotect(g2h(page_addr), qemu_host_page_size,
1324 (prot & PAGE_BITS) & ~PAGE_WRITE);
1325 #ifdef DEBUG_TB_INVALIDATE
1326 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1327 page_addr);
1328 #endif
1330 #else
1331 /* if some code is already present, then the pages are already
1332 protected. So we handle the case where only the first TB is
1333 allocated in a physical page */
1334 if (!page_already_protected) {
1335 tlb_protect_code(page_addr);
1337 #endif
1339 #endif /* TARGET_HAS_SMC */
1342 /* add a new TB and link it to the physical page tables. phys_page2 is
1343 (-1) to indicate that only one page contains the TB. */
1344 void tb_link_page(TranslationBlock *tb,
1345 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1347 unsigned int h;
1348 TranslationBlock **ptb;
1350 /* Grab the mmap lock to stop another thread invalidating this TB
1351 before we are done. */
1352 mmap_lock();
1353 /* add in the physical hash table */
1354 h = tb_phys_hash_func(phys_pc);
1355 ptb = &tb_phys_hash[h];
1356 tb->phys_hash_next = *ptb;
1357 *ptb = tb;
1359 /* add in the page list */
1360 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1361 if (phys_page2 != -1)
1362 tb_alloc_page(tb, 1, phys_page2);
1363 else
1364 tb->page_addr[1] = -1;
1366 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1367 tb->jmp_next[0] = NULL;
1368 tb->jmp_next[1] = NULL;
1370 /* init original jump addresses */
1371 if (tb->tb_next_offset[0] != 0xffff)
1372 tb_reset_jump(tb, 0);
1373 if (tb->tb_next_offset[1] != 0xffff)
1374 tb_reset_jump(tb, 1);
1376 #ifdef DEBUG_TB_CHECK
1377 tb_page_check();
1378 #endif
1379 mmap_unlock();
1382 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1383 tb[1].tc_ptr. Return NULL if not found */
1384 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1386 int m_min, m_max, m;
1387 uintptr_t v;
1388 TranslationBlock *tb;
1390 if (nb_tbs <= 0)
1391 return NULL;
1392 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1393 tc_ptr >= (uintptr_t)code_gen_ptr) {
1394 return NULL;
1396 /* binary search (cf Knuth) */
1397 m_min = 0;
1398 m_max = nb_tbs - 1;
1399 while (m_min <= m_max) {
1400 m = (m_min + m_max) >> 1;
1401 tb = &tbs[m];
1402 v = (uintptr_t)tb->tc_ptr;
1403 if (v == tc_ptr)
1404 return tb;
1405 else if (tc_ptr < v) {
1406 m_max = m - 1;
1407 } else {
1408 m_min = m + 1;
1411 return &tbs[m_max];
1414 static void tb_reset_jump_recursive(TranslationBlock *tb);
1416 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1418 TranslationBlock *tb1, *tb_next, **ptb;
1419 unsigned int n1;
1421 tb1 = tb->jmp_next[n];
1422 if (tb1 != NULL) {
1423 /* find head of list */
1424 for(;;) {
1425 n1 = (uintptr_t)tb1 & 3;
1426 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1427 if (n1 == 2)
1428 break;
1429 tb1 = tb1->jmp_next[n1];
1431 /* we are now sure now that tb jumps to tb1 */
1432 tb_next = tb1;
1434 /* remove tb from the jmp_first list */
1435 ptb = &tb_next->jmp_first;
1436 for(;;) {
1437 tb1 = *ptb;
1438 n1 = (uintptr_t)tb1 & 3;
1439 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1440 if (n1 == n && tb1 == tb)
1441 break;
1442 ptb = &tb1->jmp_next[n1];
1444 *ptb = tb->jmp_next[n];
1445 tb->jmp_next[n] = NULL;
1447 /* suppress the jump to next tb in generated code */
1448 tb_reset_jump(tb, n);
1450 /* suppress jumps in the tb on which we could have jumped */
1451 tb_reset_jump_recursive(tb_next);
1455 static void tb_reset_jump_recursive(TranslationBlock *tb)
1457 tb_reset_jump_recursive2(tb, 0);
1458 tb_reset_jump_recursive2(tb, 1);
1461 #if defined(TARGET_HAS_ICE)
1462 #if defined(CONFIG_USER_ONLY)
1463 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1465 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1467 #else
1468 void tb_invalidate_phys_addr(target_phys_addr_t addr)
1470 ram_addr_t ram_addr;
1471 MemoryRegionSection *section;
1473 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1474 if (!(memory_region_is_ram(section->mr)
1475 || (section->mr->rom_device && section->mr->readable))) {
1476 return;
1478 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1479 + memory_region_section_addr(section, addr);
1480 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1483 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1485 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1486 (pc & ~TARGET_PAGE_MASK));
1488 #endif
1489 #endif /* TARGET_HAS_ICE */
1491 #if defined(CONFIG_USER_ONLY)
1492 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1497 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1498 int flags, CPUWatchpoint **watchpoint)
1500 return -ENOSYS;
1502 #else
1503 /* Add a watchpoint. */
1504 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1505 int flags, CPUWatchpoint **watchpoint)
1507 target_ulong len_mask = ~(len - 1);
1508 CPUWatchpoint *wp;
1510 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1511 if ((len & (len - 1)) || (addr & ~len_mask) ||
1512 len == 0 || len > TARGET_PAGE_SIZE) {
1513 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1514 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1515 return -EINVAL;
1517 wp = g_malloc(sizeof(*wp));
1519 wp->vaddr = addr;
1520 wp->len_mask = len_mask;
1521 wp->flags = flags;
1523 /* keep all GDB-injected watchpoints in front */
1524 if (flags & BP_GDB)
1525 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1526 else
1527 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1529 tlb_flush_page(env, addr);
1531 if (watchpoint)
1532 *watchpoint = wp;
1533 return 0;
1536 /* Remove a specific watchpoint. */
1537 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1538 int flags)
1540 target_ulong len_mask = ~(len - 1);
1541 CPUWatchpoint *wp;
1543 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1544 if (addr == wp->vaddr && len_mask == wp->len_mask
1545 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1546 cpu_watchpoint_remove_by_ref(env, wp);
1547 return 0;
1550 return -ENOENT;
1553 /* Remove a specific watchpoint by reference. */
1554 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1556 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1558 tlb_flush_page(env, watchpoint->vaddr);
1560 g_free(watchpoint);
1563 /* Remove all matching watchpoints. */
1564 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1566 CPUWatchpoint *wp, *next;
1568 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1569 if (wp->flags & mask)
1570 cpu_watchpoint_remove_by_ref(env, wp);
1573 #endif
1575 /* Add a breakpoint. */
1576 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1577 CPUBreakpoint **breakpoint)
1579 #if defined(TARGET_HAS_ICE)
1580 CPUBreakpoint *bp;
1582 bp = g_malloc(sizeof(*bp));
1584 bp->pc = pc;
1585 bp->flags = flags;
1587 /* keep all GDB-injected breakpoints in front */
1588 if (flags & BP_GDB)
1589 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1590 else
1591 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1593 breakpoint_invalidate(env, pc);
1595 if (breakpoint)
1596 *breakpoint = bp;
1597 return 0;
1598 #else
1599 return -ENOSYS;
1600 #endif
1603 /* Remove a specific breakpoint. */
1604 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1606 #if defined(TARGET_HAS_ICE)
1607 CPUBreakpoint *bp;
1609 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1610 if (bp->pc == pc && bp->flags == flags) {
1611 cpu_breakpoint_remove_by_ref(env, bp);
1612 return 0;
1615 return -ENOENT;
1616 #else
1617 return -ENOSYS;
1618 #endif
1621 /* Remove a specific breakpoint by reference. */
1622 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1624 #if defined(TARGET_HAS_ICE)
1625 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1627 breakpoint_invalidate(env, breakpoint->pc);
1629 g_free(breakpoint);
1630 #endif
1633 /* Remove all matching breakpoints. */
1634 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1636 #if defined(TARGET_HAS_ICE)
1637 CPUBreakpoint *bp, *next;
1639 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1640 if (bp->flags & mask)
1641 cpu_breakpoint_remove_by_ref(env, bp);
1643 #endif
1646 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1647 CPU loop after each instruction */
1648 void cpu_single_step(CPUArchState *env, int enabled)
1650 #if defined(TARGET_HAS_ICE)
1651 if (env->singlestep_enabled != enabled) {
1652 env->singlestep_enabled = enabled;
1653 if (kvm_enabled())
1654 kvm_update_guest_debug(env, 0);
1655 else {
1656 /* must flush all the translated code to avoid inconsistencies */
1657 /* XXX: only flush what is necessary */
1658 tb_flush(env);
1661 #endif
1664 static void cpu_unlink_tb(CPUArchState *env)
1666 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1667 problem and hope the cpu will stop of its own accord. For userspace
1668 emulation this often isn't actually as bad as it sounds. Often
1669 signals are used primarily to interrupt blocking syscalls. */
1670 TranslationBlock *tb;
1671 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1673 spin_lock(&interrupt_lock);
1674 tb = env->current_tb;
1675 /* if the cpu is currently executing code, we must unlink it and
1676 all the potentially executing TB */
1677 if (tb) {
1678 env->current_tb = NULL;
1679 tb_reset_jump_recursive(tb);
1681 spin_unlock(&interrupt_lock);
1684 #ifndef CONFIG_USER_ONLY
1685 /* mask must never be zero, except for A20 change call */
1686 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1688 int old_mask;
1690 old_mask = env->interrupt_request;
1691 env->interrupt_request |= mask;
1694 * If called from iothread context, wake the target cpu in
1695 * case its halted.
1697 if (!qemu_cpu_is_self(env)) {
1698 qemu_cpu_kick(env);
1699 return;
1702 if (use_icount) {
1703 env->icount_decr.u16.high = 0xffff;
1704 if (!can_do_io(env)
1705 && (mask & ~old_mask) != 0) {
1706 cpu_abort(env, "Raised interrupt while not in I/O function");
1708 } else {
1709 cpu_unlink_tb(env);
1713 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1715 #else /* CONFIG_USER_ONLY */
1717 void cpu_interrupt(CPUArchState *env, int mask)
1719 env->interrupt_request |= mask;
1720 cpu_unlink_tb(env);
1722 #endif /* CONFIG_USER_ONLY */
1724 void cpu_reset_interrupt(CPUArchState *env, int mask)
1726 env->interrupt_request &= ~mask;
1729 void cpu_exit(CPUArchState *env)
1731 env->exit_request = 1;
1732 cpu_unlink_tb(env);
1735 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1737 va_list ap;
1738 va_list ap2;
1740 va_start(ap, fmt);
1741 va_copy(ap2, ap);
1742 fprintf(stderr, "qemu: fatal: ");
1743 vfprintf(stderr, fmt, ap);
1744 fprintf(stderr, "\n");
1745 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1746 if (qemu_log_enabled()) {
1747 qemu_log("qemu: fatal: ");
1748 qemu_log_vprintf(fmt, ap2);
1749 qemu_log("\n");
1750 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1751 qemu_log_flush();
1752 qemu_log_close();
1754 va_end(ap2);
1755 va_end(ap);
1756 #if defined(CONFIG_USER_ONLY)
1758 struct sigaction act;
1759 sigfillset(&act.sa_mask);
1760 act.sa_handler = SIG_DFL;
1761 sigaction(SIGABRT, &act, NULL);
1763 #endif
1764 abort();
1767 CPUArchState *cpu_copy(CPUArchState *env)
1769 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1770 CPUArchState *next_cpu = new_env->next_cpu;
1771 int cpu_index = new_env->cpu_index;
1772 #if defined(TARGET_HAS_ICE)
1773 CPUBreakpoint *bp;
1774 CPUWatchpoint *wp;
1775 #endif
1777 memcpy(new_env, env, sizeof(CPUArchState));
1779 /* Preserve chaining and index. */
1780 new_env->next_cpu = next_cpu;
1781 new_env->cpu_index = cpu_index;
1783 /* Clone all break/watchpoints.
1784 Note: Once we support ptrace with hw-debug register access, make sure
1785 BP_CPU break/watchpoints are handled correctly on clone. */
1786 QTAILQ_INIT(&env->breakpoints);
1787 QTAILQ_INIT(&env->watchpoints);
1788 #if defined(TARGET_HAS_ICE)
1789 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1790 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1792 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1793 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1794 wp->flags, NULL);
1796 #endif
1798 return new_env;
1801 #if !defined(CONFIG_USER_ONLY)
1802 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1804 unsigned int i;
1806 /* Discard jump cache entries for any tb which might potentially
1807 overlap the flushed page. */
1808 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1809 memset (&env->tb_jmp_cache[i], 0,
1810 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1812 i = tb_jmp_cache_hash_page(addr);
1813 memset (&env->tb_jmp_cache[i], 0,
1814 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1817 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1818 uintptr_t length)
1820 uintptr_t start1;
1822 /* we modify the TLB cache so that the dirty bit will be set again
1823 when accessing the range */
1824 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1825 /* Check that we don't span multiple blocks - this breaks the
1826 address comparisons below. */
1827 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1828 != (end - 1) - start) {
1829 abort();
1831 cpu_tlb_reset_dirty_all(start1, length);
1835 /* Note: start and end must be within the same ram block. */
1836 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1837 int dirty_flags)
1839 uintptr_t length;
1841 start &= TARGET_PAGE_MASK;
1842 end = TARGET_PAGE_ALIGN(end);
1844 length = end - start;
1845 if (length == 0)
1846 return;
1847 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1849 if (tcg_enabled()) {
1850 tlb_reset_dirty_range_all(start, end, length);
1854 int cpu_physical_memory_set_dirty_tracking(int enable)
1856 int ret = 0;
1857 in_migration = enable;
1858 return ret;
1861 target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1862 MemoryRegionSection *section,
1863 target_ulong vaddr,
1864 target_phys_addr_t paddr,
1865 int prot,
1866 target_ulong *address)
1868 target_phys_addr_t iotlb;
1869 CPUWatchpoint *wp;
1871 if (memory_region_is_ram(section->mr)) {
1872 /* Normal RAM. */
1873 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1874 + memory_region_section_addr(section, paddr);
1875 if (!section->readonly) {
1876 iotlb |= phys_section_notdirty;
1877 } else {
1878 iotlb |= phys_section_rom;
1880 } else {
1881 /* IO handlers are currently passed a physical address.
1882 It would be nice to pass an offset from the base address
1883 of that region. This would avoid having to special case RAM,
1884 and avoid full address decoding in every device.
1885 We can't use the high bits of pd for this because
1886 IO_MEM_ROMD uses these as a ram address. */
1887 iotlb = section - phys_sections;
1888 iotlb += memory_region_section_addr(section, paddr);
1891 /* Make accesses to pages with watchpoints go via the
1892 watchpoint trap routines. */
1893 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1894 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1895 /* Avoid trapping reads of pages with a write breakpoint. */
1896 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1897 iotlb = phys_section_watch + paddr;
1898 *address |= TLB_MMIO;
1899 break;
1904 return iotlb;
1907 #else
1909 * Walks guest process memory "regions" one by one
1910 * and calls callback function 'fn' for each region.
1913 struct walk_memory_regions_data
1915 walk_memory_regions_fn fn;
1916 void *priv;
1917 uintptr_t start;
1918 int prot;
1921 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1922 abi_ulong end, int new_prot)
1924 if (data->start != -1ul) {
1925 int rc = data->fn(data->priv, data->start, end, data->prot);
1926 if (rc != 0) {
1927 return rc;
1931 data->start = (new_prot ? end : -1ul);
1932 data->prot = new_prot;
1934 return 0;
1937 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1938 abi_ulong base, int level, void **lp)
1940 abi_ulong pa;
1941 int i, rc;
1943 if (*lp == NULL) {
1944 return walk_memory_regions_end(data, base, 0);
1947 if (level == 0) {
1948 PageDesc *pd = *lp;
1949 for (i = 0; i < L2_SIZE; ++i) {
1950 int prot = pd[i].flags;
1952 pa = base | (i << TARGET_PAGE_BITS);
1953 if (prot != data->prot) {
1954 rc = walk_memory_regions_end(data, pa, prot);
1955 if (rc != 0) {
1956 return rc;
1960 } else {
1961 void **pp = *lp;
1962 for (i = 0; i < L2_SIZE; ++i) {
1963 pa = base | ((abi_ulong)i <<
1964 (TARGET_PAGE_BITS + L2_BITS * level));
1965 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1966 if (rc != 0) {
1967 return rc;
1972 return 0;
1975 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1977 struct walk_memory_regions_data data;
1978 uintptr_t i;
1980 data.fn = fn;
1981 data.priv = priv;
1982 data.start = -1ul;
1983 data.prot = 0;
1985 for (i = 0; i < V_L1_SIZE; i++) {
1986 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1987 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1988 if (rc != 0) {
1989 return rc;
1993 return walk_memory_regions_end(&data, 0, 0);
1996 static int dump_region(void *priv, abi_ulong start,
1997 abi_ulong end, unsigned long prot)
1999 FILE *f = (FILE *)priv;
2001 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2002 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2003 start, end, end - start,
2004 ((prot & PAGE_READ) ? 'r' : '-'),
2005 ((prot & PAGE_WRITE) ? 'w' : '-'),
2006 ((prot & PAGE_EXEC) ? 'x' : '-'));
2008 return (0);
2011 /* dump memory mappings */
2012 void page_dump(FILE *f)
2014 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2015 "start", "end", "size", "prot");
2016 walk_memory_regions(f, dump_region);
2019 int page_get_flags(target_ulong address)
2021 PageDesc *p;
2023 p = page_find(address >> TARGET_PAGE_BITS);
2024 if (!p)
2025 return 0;
2026 return p->flags;
2029 /* Modify the flags of a page and invalidate the code if necessary.
2030 The flag PAGE_WRITE_ORG is positioned automatically depending
2031 on PAGE_WRITE. The mmap_lock should already be held. */
2032 void page_set_flags(target_ulong start, target_ulong end, int flags)
2034 target_ulong addr, len;
2036 /* This function should never be called with addresses outside the
2037 guest address space. If this assert fires, it probably indicates
2038 a missing call to h2g_valid. */
2039 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2040 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2041 #endif
2042 assert(start < end);
2044 start = start & TARGET_PAGE_MASK;
2045 end = TARGET_PAGE_ALIGN(end);
2047 if (flags & PAGE_WRITE) {
2048 flags |= PAGE_WRITE_ORG;
2051 for (addr = start, len = end - start;
2052 len != 0;
2053 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2054 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2056 /* If the write protection bit is set, then we invalidate
2057 the code inside. */
2058 if (!(p->flags & PAGE_WRITE) &&
2059 (flags & PAGE_WRITE) &&
2060 p->first_tb) {
2061 tb_invalidate_phys_page(addr, 0, NULL);
2063 p->flags = flags;
2067 int page_check_range(target_ulong start, target_ulong len, int flags)
2069 PageDesc *p;
2070 target_ulong end;
2071 target_ulong addr;
2073 /* This function should never be called with addresses outside the
2074 guest address space. If this assert fires, it probably indicates
2075 a missing call to h2g_valid. */
2076 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2077 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2078 #endif
2080 if (len == 0) {
2081 return 0;
2083 if (start + len - 1 < start) {
2084 /* We've wrapped around. */
2085 return -1;
2088 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2089 start = start & TARGET_PAGE_MASK;
2091 for (addr = start, len = end - start;
2092 len != 0;
2093 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2094 p = page_find(addr >> TARGET_PAGE_BITS);
2095 if( !p )
2096 return -1;
2097 if( !(p->flags & PAGE_VALID) )
2098 return -1;
2100 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2101 return -1;
2102 if (flags & PAGE_WRITE) {
2103 if (!(p->flags & PAGE_WRITE_ORG))
2104 return -1;
2105 /* unprotect the page if it was put read-only because it
2106 contains translated code */
2107 if (!(p->flags & PAGE_WRITE)) {
2108 if (!page_unprotect(addr, 0, NULL))
2109 return -1;
2111 return 0;
2114 return 0;
2117 /* called from signal handler: invalidate the code and unprotect the
2118 page. Return TRUE if the fault was successfully handled. */
2119 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2121 unsigned int prot;
2122 PageDesc *p;
2123 target_ulong host_start, host_end, addr;
2125 /* Technically this isn't safe inside a signal handler. However we
2126 know this only ever happens in a synchronous SEGV handler, so in
2127 practice it seems to be ok. */
2128 mmap_lock();
2130 p = page_find(address >> TARGET_PAGE_BITS);
2131 if (!p) {
2132 mmap_unlock();
2133 return 0;
2136 /* if the page was really writable, then we change its
2137 protection back to writable */
2138 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2139 host_start = address & qemu_host_page_mask;
2140 host_end = host_start + qemu_host_page_size;
2142 prot = 0;
2143 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2144 p = page_find(addr >> TARGET_PAGE_BITS);
2145 p->flags |= PAGE_WRITE;
2146 prot |= p->flags;
2148 /* and since the content will be modified, we must invalidate
2149 the corresponding translated code. */
2150 tb_invalidate_phys_page(addr, pc, puc);
2151 #ifdef DEBUG_TB_CHECK
2152 tb_invalidate_check(addr);
2153 #endif
2155 mprotect((void *)g2h(host_start), qemu_host_page_size,
2156 prot & PAGE_BITS);
2158 mmap_unlock();
2159 return 1;
2161 mmap_unlock();
2162 return 0;
2164 #endif /* defined(CONFIG_USER_ONLY) */
2166 #if !defined(CONFIG_USER_ONLY)
2168 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2169 typedef struct subpage_t {
2170 MemoryRegion iomem;
2171 target_phys_addr_t base;
2172 uint16_t sub_section[TARGET_PAGE_SIZE];
2173 } subpage_t;
2175 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2176 uint16_t section);
2177 static subpage_t *subpage_init(target_phys_addr_t base);
2178 static void destroy_page_desc(uint16_t section_index)
2180 MemoryRegionSection *section = &phys_sections[section_index];
2181 MemoryRegion *mr = section->mr;
2183 if (mr->subpage) {
2184 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2185 memory_region_destroy(&subpage->iomem);
2186 g_free(subpage);
2190 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2192 unsigned i;
2193 PhysPageEntry *p;
2195 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2196 return;
2199 p = phys_map_nodes[lp->ptr];
2200 for (i = 0; i < L2_SIZE; ++i) {
2201 if (!p[i].is_leaf) {
2202 destroy_l2_mapping(&p[i], level - 1);
2203 } else {
2204 destroy_page_desc(p[i].ptr);
2207 lp->is_leaf = 0;
2208 lp->ptr = PHYS_MAP_NODE_NIL;
2211 static void destroy_all_mappings(void)
2213 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2214 phys_map_nodes_reset();
2217 static uint16_t phys_section_add(MemoryRegionSection *section)
2219 if (phys_sections_nb == phys_sections_nb_alloc) {
2220 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2221 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2222 phys_sections_nb_alloc);
2224 phys_sections[phys_sections_nb] = *section;
2225 return phys_sections_nb++;
2228 static void phys_sections_clear(void)
2230 phys_sections_nb = 0;
2233 static void register_subpage(MemoryRegionSection *section)
2235 subpage_t *subpage;
2236 target_phys_addr_t base = section->offset_within_address_space
2237 & TARGET_PAGE_MASK;
2238 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2239 MemoryRegionSection subsection = {
2240 .offset_within_address_space = base,
2241 .size = TARGET_PAGE_SIZE,
2243 target_phys_addr_t start, end;
2245 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2247 if (!(existing->mr->subpage)) {
2248 subpage = subpage_init(base);
2249 subsection.mr = &subpage->iomem;
2250 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2251 phys_section_add(&subsection));
2252 } else {
2253 subpage = container_of(existing->mr, subpage_t, iomem);
2255 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2256 end = start + section->size - 1;
2257 subpage_register(subpage, start, end, phys_section_add(section));
2261 static void register_multipage(MemoryRegionSection *section)
2263 target_phys_addr_t start_addr = section->offset_within_address_space;
2264 ram_addr_t size = section->size;
2265 target_phys_addr_t addr;
2266 uint16_t section_index = phys_section_add(section);
2268 assert(size);
2270 addr = start_addr;
2271 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2272 section_index);
2275 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2276 bool readonly)
2278 MemoryRegionSection now = *section, remain = *section;
2280 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2281 || (now.size < TARGET_PAGE_SIZE)) {
2282 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2283 - now.offset_within_address_space,
2284 now.size);
2285 register_subpage(&now);
2286 remain.size -= now.size;
2287 remain.offset_within_address_space += now.size;
2288 remain.offset_within_region += now.size;
2290 while (remain.size >= TARGET_PAGE_SIZE) {
2291 now = remain;
2292 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2293 now.size = TARGET_PAGE_SIZE;
2294 register_subpage(&now);
2295 } else {
2296 now.size &= TARGET_PAGE_MASK;
2297 register_multipage(&now);
2299 remain.size -= now.size;
2300 remain.offset_within_address_space += now.size;
2301 remain.offset_within_region += now.size;
2303 now = remain;
2304 if (now.size) {
2305 register_subpage(&now);
2310 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2312 if (kvm_enabled())
2313 kvm_coalesce_mmio_region(addr, size);
2316 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2318 if (kvm_enabled())
2319 kvm_uncoalesce_mmio_region(addr, size);
2322 void qemu_flush_coalesced_mmio_buffer(void)
2324 if (kvm_enabled())
2325 kvm_flush_coalesced_mmio_buffer();
2328 #if defined(__linux__) && !defined(TARGET_S390X)
2330 #include <sys/vfs.h>
2332 #define HUGETLBFS_MAGIC 0x958458f6
2334 static long gethugepagesize(const char *path)
2336 struct statfs fs;
2337 int ret;
2339 do {
2340 ret = statfs(path, &fs);
2341 } while (ret != 0 && errno == EINTR);
2343 if (ret != 0) {
2344 perror(path);
2345 return 0;
2348 if (fs.f_type != HUGETLBFS_MAGIC)
2349 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2351 return fs.f_bsize;
2354 static void *file_ram_alloc(RAMBlock *block,
2355 ram_addr_t memory,
2356 const char *path)
2358 char *filename;
2359 void *area;
2360 int fd;
2361 #ifdef MAP_POPULATE
2362 int flags;
2363 #endif
2364 unsigned long hpagesize;
2366 hpagesize = gethugepagesize(path);
2367 if (!hpagesize) {
2368 return NULL;
2371 if (memory < hpagesize) {
2372 return NULL;
2375 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2376 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2377 return NULL;
2380 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2381 return NULL;
2384 fd = mkstemp(filename);
2385 if (fd < 0) {
2386 perror("unable to create backing store for hugepages");
2387 free(filename);
2388 return NULL;
2390 unlink(filename);
2391 free(filename);
2393 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2396 * ftruncate is not supported by hugetlbfs in older
2397 * hosts, so don't bother bailing out on errors.
2398 * If anything goes wrong with it under other filesystems,
2399 * mmap will fail.
2401 if (ftruncate(fd, memory))
2402 perror("ftruncate");
2404 #ifdef MAP_POPULATE
2405 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2406 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2407 * to sidestep this quirk.
2409 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2410 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2411 #else
2412 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2413 #endif
2414 if (area == MAP_FAILED) {
2415 perror("file_ram_alloc: can't mmap RAM pages");
2416 close(fd);
2417 return (NULL);
2419 block->fd = fd;
2420 return area;
2422 #endif
2424 static ram_addr_t find_ram_offset(ram_addr_t size)
2426 RAMBlock *block, *next_block;
2427 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2429 if (QLIST_EMPTY(&ram_list.blocks))
2430 return 0;
2432 QLIST_FOREACH(block, &ram_list.blocks, next) {
2433 ram_addr_t end, next = RAM_ADDR_MAX;
2435 end = block->offset + block->length;
2437 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2438 if (next_block->offset >= end) {
2439 next = MIN(next, next_block->offset);
2442 if (next - end >= size && next - end < mingap) {
2443 offset = end;
2444 mingap = next - end;
2448 if (offset == RAM_ADDR_MAX) {
2449 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2450 (uint64_t)size);
2451 abort();
2454 return offset;
2457 static ram_addr_t last_ram_offset(void)
2459 RAMBlock *block;
2460 ram_addr_t last = 0;
2462 QLIST_FOREACH(block, &ram_list.blocks, next)
2463 last = MAX(last, block->offset + block->length);
2465 return last;
2468 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2470 int ret;
2471 QemuOpts *machine_opts;
2473 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2474 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2475 if (machine_opts &&
2476 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2477 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2478 if (ret) {
2479 perror("qemu_madvise");
2480 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2481 "but dump_guest_core=off specified\n");
2486 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2488 RAMBlock *new_block, *block;
2490 new_block = NULL;
2491 QLIST_FOREACH(block, &ram_list.blocks, next) {
2492 if (block->offset == addr) {
2493 new_block = block;
2494 break;
2497 assert(new_block);
2498 assert(!new_block->idstr[0]);
2500 if (dev) {
2501 char *id = qdev_get_dev_path(dev);
2502 if (id) {
2503 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2504 g_free(id);
2507 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2509 QLIST_FOREACH(block, &ram_list.blocks, next) {
2510 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2511 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2512 new_block->idstr);
2513 abort();
2518 static int memory_try_enable_merging(void *addr, size_t len)
2520 QemuOpts *opts;
2522 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2523 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2524 /* disabled by the user */
2525 return 0;
2528 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2531 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2532 MemoryRegion *mr)
2534 RAMBlock *new_block;
2536 size = TARGET_PAGE_ALIGN(size);
2537 new_block = g_malloc0(sizeof(*new_block));
2539 new_block->mr = mr;
2540 new_block->offset = find_ram_offset(size);
2541 if (host) {
2542 new_block->host = host;
2543 new_block->flags |= RAM_PREALLOC_MASK;
2544 } else {
2545 if (mem_path) {
2546 #if defined (__linux__) && !defined(TARGET_S390X)
2547 new_block->host = file_ram_alloc(new_block, size, mem_path);
2548 if (!new_block->host) {
2549 new_block->host = qemu_vmalloc(size);
2550 memory_try_enable_merging(new_block->host, size);
2552 #else
2553 fprintf(stderr, "-mem-path option unsupported\n");
2554 exit(1);
2555 #endif
2556 } else {
2557 if (xen_enabled()) {
2558 xen_ram_alloc(new_block->offset, size, mr);
2559 } else if (kvm_enabled()) {
2560 /* some s390/kvm configurations have special constraints */
2561 new_block->host = kvm_vmalloc(size);
2562 } else {
2563 new_block->host = qemu_vmalloc(size);
2565 memory_try_enable_merging(new_block->host, size);
2568 new_block->length = size;
2570 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2572 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2573 last_ram_offset() >> TARGET_PAGE_BITS);
2574 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2575 0, size >> TARGET_PAGE_BITS);
2576 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2578 qemu_ram_setup_dump(new_block->host, size);
2580 if (kvm_enabled())
2581 kvm_setup_guest_memory(new_block->host, size);
2583 return new_block->offset;
2586 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2588 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2591 void qemu_ram_free_from_ptr(ram_addr_t addr)
2593 RAMBlock *block;
2595 QLIST_FOREACH(block, &ram_list.blocks, next) {
2596 if (addr == block->offset) {
2597 QLIST_REMOVE(block, next);
2598 g_free(block);
2599 return;
2604 void qemu_ram_free(ram_addr_t addr)
2606 RAMBlock *block;
2608 QLIST_FOREACH(block, &ram_list.blocks, next) {
2609 if (addr == block->offset) {
2610 QLIST_REMOVE(block, next);
2611 if (block->flags & RAM_PREALLOC_MASK) {
2613 } else if (mem_path) {
2614 #if defined (__linux__) && !defined(TARGET_S390X)
2615 if (block->fd) {
2616 munmap(block->host, block->length);
2617 close(block->fd);
2618 } else {
2619 qemu_vfree(block->host);
2621 #else
2622 abort();
2623 #endif
2624 } else {
2625 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2626 munmap(block->host, block->length);
2627 #else
2628 if (xen_enabled()) {
2629 xen_invalidate_map_cache_entry(block->host);
2630 } else {
2631 qemu_vfree(block->host);
2633 #endif
2635 g_free(block);
2636 return;
2642 #ifndef _WIN32
2643 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2645 RAMBlock *block;
2646 ram_addr_t offset;
2647 int flags;
2648 void *area, *vaddr;
2650 QLIST_FOREACH(block, &ram_list.blocks, next) {
2651 offset = addr - block->offset;
2652 if (offset < block->length) {
2653 vaddr = block->host + offset;
2654 if (block->flags & RAM_PREALLOC_MASK) {
2656 } else {
2657 flags = MAP_FIXED;
2658 munmap(vaddr, length);
2659 if (mem_path) {
2660 #if defined(__linux__) && !defined(TARGET_S390X)
2661 if (block->fd) {
2662 #ifdef MAP_POPULATE
2663 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2664 MAP_PRIVATE;
2665 #else
2666 flags |= MAP_PRIVATE;
2667 #endif
2668 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2669 flags, block->fd, offset);
2670 } else {
2671 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2672 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2673 flags, -1, 0);
2675 #else
2676 abort();
2677 #endif
2678 } else {
2679 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2680 flags |= MAP_SHARED | MAP_ANONYMOUS;
2681 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2682 flags, -1, 0);
2683 #else
2684 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2685 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2686 flags, -1, 0);
2687 #endif
2689 if (area != vaddr) {
2690 fprintf(stderr, "Could not remap addr: "
2691 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2692 length, addr);
2693 exit(1);
2695 memory_try_enable_merging(vaddr, length);
2696 qemu_ram_setup_dump(vaddr, length);
2698 return;
2702 #endif /* !_WIN32 */
2704 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2705 With the exception of the softmmu code in this file, this should
2706 only be used for local memory (e.g. video ram) that the device owns,
2707 and knows it isn't going to access beyond the end of the block.
2709 It should not be used for general purpose DMA.
2710 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2712 void *qemu_get_ram_ptr(ram_addr_t addr)
2714 RAMBlock *block;
2716 QLIST_FOREACH(block, &ram_list.blocks, next) {
2717 if (addr - block->offset < block->length) {
2718 /* Move this entry to to start of the list. */
2719 if (block != QLIST_FIRST(&ram_list.blocks)) {
2720 QLIST_REMOVE(block, next);
2721 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2723 if (xen_enabled()) {
2724 /* We need to check if the requested address is in the RAM
2725 * because we don't want to map the entire memory in QEMU.
2726 * In that case just map until the end of the page.
2728 if (block->offset == 0) {
2729 return xen_map_cache(addr, 0, 0);
2730 } else if (block->host == NULL) {
2731 block->host =
2732 xen_map_cache(block->offset, block->length, 1);
2735 return block->host + (addr - block->offset);
2739 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2740 abort();
2742 return NULL;
2745 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2746 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2748 void *qemu_safe_ram_ptr(ram_addr_t addr)
2750 RAMBlock *block;
2752 QLIST_FOREACH(block, &ram_list.blocks, next) {
2753 if (addr - block->offset < block->length) {
2754 if (xen_enabled()) {
2755 /* We need to check if the requested address is in the RAM
2756 * because we don't want to map the entire memory in QEMU.
2757 * In that case just map until the end of the page.
2759 if (block->offset == 0) {
2760 return xen_map_cache(addr, 0, 0);
2761 } else if (block->host == NULL) {
2762 block->host =
2763 xen_map_cache(block->offset, block->length, 1);
2766 return block->host + (addr - block->offset);
2770 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2771 abort();
2773 return NULL;
2776 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2777 * but takes a size argument */
2778 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2780 if (*size == 0) {
2781 return NULL;
2783 if (xen_enabled()) {
2784 return xen_map_cache(addr, *size, 1);
2785 } else {
2786 RAMBlock *block;
2788 QLIST_FOREACH(block, &ram_list.blocks, next) {
2789 if (addr - block->offset < block->length) {
2790 if (addr - block->offset + *size > block->length)
2791 *size = block->length - addr + block->offset;
2792 return block->host + (addr - block->offset);
2796 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2797 abort();
2801 void qemu_put_ram_ptr(void *addr)
2803 trace_qemu_put_ram_ptr(addr);
2806 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2808 RAMBlock *block;
2809 uint8_t *host = ptr;
2811 if (xen_enabled()) {
2812 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2813 return 0;
2816 QLIST_FOREACH(block, &ram_list.blocks, next) {
2817 /* This case append when the block is not mapped. */
2818 if (block->host == NULL) {
2819 continue;
2821 if (host - block->host < block->length) {
2822 *ram_addr = block->offset + (host - block->host);
2823 return 0;
2827 return -1;
2830 /* Some of the softmmu routines need to translate from a host pointer
2831 (typically a TLB entry) back to a ram offset. */
2832 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2834 ram_addr_t ram_addr;
2836 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2837 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2838 abort();
2840 return ram_addr;
2843 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2844 unsigned size)
2846 #ifdef DEBUG_UNASSIGNED
2847 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2848 #endif
2849 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2850 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2851 #endif
2852 return 0;
2855 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2856 uint64_t val, unsigned size)
2858 #ifdef DEBUG_UNASSIGNED
2859 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2860 #endif
2861 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2862 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2863 #endif
2866 static const MemoryRegionOps unassigned_mem_ops = {
2867 .read = unassigned_mem_read,
2868 .write = unassigned_mem_write,
2869 .endianness = DEVICE_NATIVE_ENDIAN,
2872 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2873 unsigned size)
2875 abort();
2878 static void error_mem_write(void *opaque, target_phys_addr_t addr,
2879 uint64_t value, unsigned size)
2881 abort();
2884 static const MemoryRegionOps error_mem_ops = {
2885 .read = error_mem_read,
2886 .write = error_mem_write,
2887 .endianness = DEVICE_NATIVE_ENDIAN,
2890 static const MemoryRegionOps rom_mem_ops = {
2891 .read = error_mem_read,
2892 .write = unassigned_mem_write,
2893 .endianness = DEVICE_NATIVE_ENDIAN,
2896 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2897 uint64_t val, unsigned size)
2899 int dirty_flags;
2900 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2901 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2902 #if !defined(CONFIG_USER_ONLY)
2903 tb_invalidate_phys_page_fast(ram_addr, size);
2904 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2905 #endif
2907 switch (size) {
2908 case 1:
2909 stb_p(qemu_get_ram_ptr(ram_addr), val);
2910 break;
2911 case 2:
2912 stw_p(qemu_get_ram_ptr(ram_addr), val);
2913 break;
2914 case 4:
2915 stl_p(qemu_get_ram_ptr(ram_addr), val);
2916 break;
2917 default:
2918 abort();
2920 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2921 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2922 /* we remove the notdirty callback only if the code has been
2923 flushed */
2924 if (dirty_flags == 0xff)
2925 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2928 static const MemoryRegionOps notdirty_mem_ops = {
2929 .read = error_mem_read,
2930 .write = notdirty_mem_write,
2931 .endianness = DEVICE_NATIVE_ENDIAN,
2934 /* Generate a debug exception if a watchpoint has been hit. */
2935 static void check_watchpoint(int offset, int len_mask, int flags)
2937 CPUArchState *env = cpu_single_env;
2938 target_ulong pc, cs_base;
2939 TranslationBlock *tb;
2940 target_ulong vaddr;
2941 CPUWatchpoint *wp;
2942 int cpu_flags;
2944 if (env->watchpoint_hit) {
2945 /* We re-entered the check after replacing the TB. Now raise
2946 * the debug interrupt so that is will trigger after the
2947 * current instruction. */
2948 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2949 return;
2951 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2952 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2953 if ((vaddr == (wp->vaddr & len_mask) ||
2954 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2955 wp->flags |= BP_WATCHPOINT_HIT;
2956 if (!env->watchpoint_hit) {
2957 env->watchpoint_hit = wp;
2958 tb = tb_find_pc(env->mem_io_pc);
2959 if (!tb) {
2960 cpu_abort(env, "check_watchpoint: could not find TB for "
2961 "pc=%p", (void *)env->mem_io_pc);
2963 cpu_restore_state(tb, env, env->mem_io_pc);
2964 tb_phys_invalidate(tb, -1);
2965 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2966 env->exception_index = EXCP_DEBUG;
2967 cpu_loop_exit(env);
2968 } else {
2969 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2970 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2971 cpu_resume_from_signal(env, NULL);
2974 } else {
2975 wp->flags &= ~BP_WATCHPOINT_HIT;
2980 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2981 so these check for a hit then pass through to the normal out-of-line
2982 phys routines. */
2983 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
2984 unsigned size)
2986 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2987 switch (size) {
2988 case 1: return ldub_phys(addr);
2989 case 2: return lduw_phys(addr);
2990 case 4: return ldl_phys(addr);
2991 default: abort();
2995 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
2996 uint64_t val, unsigned size)
2998 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2999 switch (size) {
3000 case 1:
3001 stb_phys(addr, val);
3002 break;
3003 case 2:
3004 stw_phys(addr, val);
3005 break;
3006 case 4:
3007 stl_phys(addr, val);
3008 break;
3009 default: abort();
3013 static const MemoryRegionOps watch_mem_ops = {
3014 .read = watch_mem_read,
3015 .write = watch_mem_write,
3016 .endianness = DEVICE_NATIVE_ENDIAN,
3019 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3020 unsigned len)
3022 subpage_t *mmio = opaque;
3023 unsigned int idx = SUBPAGE_IDX(addr);
3024 MemoryRegionSection *section;
3025 #if defined(DEBUG_SUBPAGE)
3026 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3027 mmio, len, addr, idx);
3028 #endif
3030 section = &phys_sections[mmio->sub_section[idx]];
3031 addr += mmio->base;
3032 addr -= section->offset_within_address_space;
3033 addr += section->offset_within_region;
3034 return io_mem_read(section->mr, addr, len);
3037 static void subpage_write(void *opaque, target_phys_addr_t addr,
3038 uint64_t value, unsigned len)
3040 subpage_t *mmio = opaque;
3041 unsigned int idx = SUBPAGE_IDX(addr);
3042 MemoryRegionSection *section;
3043 #if defined(DEBUG_SUBPAGE)
3044 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3045 " idx %d value %"PRIx64"\n",
3046 __func__, mmio, len, addr, idx, value);
3047 #endif
3049 section = &phys_sections[mmio->sub_section[idx]];
3050 addr += mmio->base;
3051 addr -= section->offset_within_address_space;
3052 addr += section->offset_within_region;
3053 io_mem_write(section->mr, addr, value, len);
3056 static const MemoryRegionOps subpage_ops = {
3057 .read = subpage_read,
3058 .write = subpage_write,
3059 .endianness = DEVICE_NATIVE_ENDIAN,
3062 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3063 unsigned size)
3065 ram_addr_t raddr = addr;
3066 void *ptr = qemu_get_ram_ptr(raddr);
3067 switch (size) {
3068 case 1: return ldub_p(ptr);
3069 case 2: return lduw_p(ptr);
3070 case 4: return ldl_p(ptr);
3071 default: abort();
3075 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3076 uint64_t value, unsigned size)
3078 ram_addr_t raddr = addr;
3079 void *ptr = qemu_get_ram_ptr(raddr);
3080 switch (size) {
3081 case 1: return stb_p(ptr, value);
3082 case 2: return stw_p(ptr, value);
3083 case 4: return stl_p(ptr, value);
3084 default: abort();
3088 static const MemoryRegionOps subpage_ram_ops = {
3089 .read = subpage_ram_read,
3090 .write = subpage_ram_write,
3091 .endianness = DEVICE_NATIVE_ENDIAN,
3094 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3095 uint16_t section)
3097 int idx, eidx;
3099 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3100 return -1;
3101 idx = SUBPAGE_IDX(start);
3102 eidx = SUBPAGE_IDX(end);
3103 #if defined(DEBUG_SUBPAGE)
3104 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3105 mmio, start, end, idx, eidx, memory);
3106 #endif
3107 if (memory_region_is_ram(phys_sections[section].mr)) {
3108 MemoryRegionSection new_section = phys_sections[section];
3109 new_section.mr = &io_mem_subpage_ram;
3110 section = phys_section_add(&new_section);
3112 for (; idx <= eidx; idx++) {
3113 mmio->sub_section[idx] = section;
3116 return 0;
3119 static subpage_t *subpage_init(target_phys_addr_t base)
3121 subpage_t *mmio;
3123 mmio = g_malloc0(sizeof(subpage_t));
3125 mmio->base = base;
3126 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3127 "subpage", TARGET_PAGE_SIZE);
3128 mmio->iomem.subpage = true;
3129 #if defined(DEBUG_SUBPAGE)
3130 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3131 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3132 #endif
3133 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3135 return mmio;
3138 static uint16_t dummy_section(MemoryRegion *mr)
3140 MemoryRegionSection section = {
3141 .mr = mr,
3142 .offset_within_address_space = 0,
3143 .offset_within_region = 0,
3144 .size = UINT64_MAX,
3147 return phys_section_add(&section);
3150 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3152 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3155 static void io_mem_init(void)
3157 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3158 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3159 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3160 "unassigned", UINT64_MAX);
3161 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3162 "notdirty", UINT64_MAX);
3163 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3164 "subpage-ram", UINT64_MAX);
3165 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3166 "watch", UINT64_MAX);
3169 static void core_begin(MemoryListener *listener)
3171 destroy_all_mappings();
3172 phys_sections_clear();
3173 phys_map.ptr = PHYS_MAP_NODE_NIL;
3174 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3175 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3176 phys_section_rom = dummy_section(&io_mem_rom);
3177 phys_section_watch = dummy_section(&io_mem_watch);
3180 static void core_commit(MemoryListener *listener)
3182 CPUArchState *env;
3184 /* since each CPU stores ram addresses in its TLB cache, we must
3185 reset the modified entries */
3186 /* XXX: slow ! */
3187 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3188 tlb_flush(env, 1);
3192 static void core_region_add(MemoryListener *listener,
3193 MemoryRegionSection *section)
3195 cpu_register_physical_memory_log(section, section->readonly);
3198 static void core_region_del(MemoryListener *listener,
3199 MemoryRegionSection *section)
3203 static void core_region_nop(MemoryListener *listener,
3204 MemoryRegionSection *section)
3206 cpu_register_physical_memory_log(section, section->readonly);
3209 static void core_log_start(MemoryListener *listener,
3210 MemoryRegionSection *section)
3214 static void core_log_stop(MemoryListener *listener,
3215 MemoryRegionSection *section)
3219 static void core_log_sync(MemoryListener *listener,
3220 MemoryRegionSection *section)
3224 static void core_log_global_start(MemoryListener *listener)
3226 cpu_physical_memory_set_dirty_tracking(1);
3229 static void core_log_global_stop(MemoryListener *listener)
3231 cpu_physical_memory_set_dirty_tracking(0);
3234 static void core_eventfd_add(MemoryListener *listener,
3235 MemoryRegionSection *section,
3236 bool match_data, uint64_t data, EventNotifier *e)
3240 static void core_eventfd_del(MemoryListener *listener,
3241 MemoryRegionSection *section,
3242 bool match_data, uint64_t data, EventNotifier *e)
3246 static void io_begin(MemoryListener *listener)
3250 static void io_commit(MemoryListener *listener)
3254 static void io_region_add(MemoryListener *listener,
3255 MemoryRegionSection *section)
3257 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3259 mrio->mr = section->mr;
3260 mrio->offset = section->offset_within_region;
3261 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3262 section->offset_within_address_space, section->size);
3263 ioport_register(&mrio->iorange);
3266 static void io_region_del(MemoryListener *listener,
3267 MemoryRegionSection *section)
3269 isa_unassign_ioport(section->offset_within_address_space, section->size);
3272 static void io_region_nop(MemoryListener *listener,
3273 MemoryRegionSection *section)
3277 static void io_log_start(MemoryListener *listener,
3278 MemoryRegionSection *section)
3282 static void io_log_stop(MemoryListener *listener,
3283 MemoryRegionSection *section)
3287 static void io_log_sync(MemoryListener *listener,
3288 MemoryRegionSection *section)
3292 static void io_log_global_start(MemoryListener *listener)
3296 static void io_log_global_stop(MemoryListener *listener)
3300 static void io_eventfd_add(MemoryListener *listener,
3301 MemoryRegionSection *section,
3302 bool match_data, uint64_t data, EventNotifier *e)
3306 static void io_eventfd_del(MemoryListener *listener,
3307 MemoryRegionSection *section,
3308 bool match_data, uint64_t data, EventNotifier *e)
3312 static MemoryListener core_memory_listener = {
3313 .begin = core_begin,
3314 .commit = core_commit,
3315 .region_add = core_region_add,
3316 .region_del = core_region_del,
3317 .region_nop = core_region_nop,
3318 .log_start = core_log_start,
3319 .log_stop = core_log_stop,
3320 .log_sync = core_log_sync,
3321 .log_global_start = core_log_global_start,
3322 .log_global_stop = core_log_global_stop,
3323 .eventfd_add = core_eventfd_add,
3324 .eventfd_del = core_eventfd_del,
3325 .priority = 0,
3328 static MemoryListener io_memory_listener = {
3329 .begin = io_begin,
3330 .commit = io_commit,
3331 .region_add = io_region_add,
3332 .region_del = io_region_del,
3333 .region_nop = io_region_nop,
3334 .log_start = io_log_start,
3335 .log_stop = io_log_stop,
3336 .log_sync = io_log_sync,
3337 .log_global_start = io_log_global_start,
3338 .log_global_stop = io_log_global_stop,
3339 .eventfd_add = io_eventfd_add,
3340 .eventfd_del = io_eventfd_del,
3341 .priority = 0,
3344 static void memory_map_init(void)
3346 system_memory = g_malloc(sizeof(*system_memory));
3347 memory_region_init(system_memory, "system", INT64_MAX);
3348 set_system_memory_map(system_memory);
3350 system_io = g_malloc(sizeof(*system_io));
3351 memory_region_init(system_io, "io", 65536);
3352 set_system_io_map(system_io);
3354 memory_listener_register(&core_memory_listener, system_memory);
3355 memory_listener_register(&io_memory_listener, system_io);
3358 MemoryRegion *get_system_memory(void)
3360 return system_memory;
3363 MemoryRegion *get_system_io(void)
3365 return system_io;
3368 #endif /* !defined(CONFIG_USER_ONLY) */
3370 /* physical memory access (slow version, mainly for debug) */
3371 #if defined(CONFIG_USER_ONLY)
3372 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3373 uint8_t *buf, int len, int is_write)
3375 int l, flags;
3376 target_ulong page;
3377 void * p;
3379 while (len > 0) {
3380 page = addr & TARGET_PAGE_MASK;
3381 l = (page + TARGET_PAGE_SIZE) - addr;
3382 if (l > len)
3383 l = len;
3384 flags = page_get_flags(page);
3385 if (!(flags & PAGE_VALID))
3386 return -1;
3387 if (is_write) {
3388 if (!(flags & PAGE_WRITE))
3389 return -1;
3390 /* XXX: this code should not depend on lock_user */
3391 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3392 return -1;
3393 memcpy(p, buf, l);
3394 unlock_user(p, addr, l);
3395 } else {
3396 if (!(flags & PAGE_READ))
3397 return -1;
3398 /* XXX: this code should not depend on lock_user */
3399 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3400 return -1;
3401 memcpy(buf, p, l);
3402 unlock_user(p, addr, 0);
3404 len -= l;
3405 buf += l;
3406 addr += l;
3408 return 0;
3411 #else
3413 static void invalidate_and_set_dirty(target_phys_addr_t addr,
3414 target_phys_addr_t length)
3416 if (!cpu_physical_memory_is_dirty(addr)) {
3417 /* invalidate code */
3418 tb_invalidate_phys_page_range(addr, addr + length, 0);
3419 /* set dirty bit */
3420 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3422 xen_modified_memory(addr, length);
3425 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3426 int len, int is_write)
3428 int l;
3429 uint8_t *ptr;
3430 uint32_t val;
3431 target_phys_addr_t page;
3432 MemoryRegionSection *section;
3434 while (len > 0) {
3435 page = addr & TARGET_PAGE_MASK;
3436 l = (page + TARGET_PAGE_SIZE) - addr;
3437 if (l > len)
3438 l = len;
3439 section = phys_page_find(page >> TARGET_PAGE_BITS);
3441 if (is_write) {
3442 if (!memory_region_is_ram(section->mr)) {
3443 target_phys_addr_t addr1;
3444 addr1 = memory_region_section_addr(section, addr);
3445 /* XXX: could force cpu_single_env to NULL to avoid
3446 potential bugs */
3447 if (l >= 4 && ((addr1 & 3) == 0)) {
3448 /* 32 bit write access */
3449 val = ldl_p(buf);
3450 io_mem_write(section->mr, addr1, val, 4);
3451 l = 4;
3452 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3453 /* 16 bit write access */
3454 val = lduw_p(buf);
3455 io_mem_write(section->mr, addr1, val, 2);
3456 l = 2;
3457 } else {
3458 /* 8 bit write access */
3459 val = ldub_p(buf);
3460 io_mem_write(section->mr, addr1, val, 1);
3461 l = 1;
3463 } else if (!section->readonly) {
3464 ram_addr_t addr1;
3465 addr1 = memory_region_get_ram_addr(section->mr)
3466 + memory_region_section_addr(section, addr);
3467 /* RAM case */
3468 ptr = qemu_get_ram_ptr(addr1);
3469 memcpy(ptr, buf, l);
3470 invalidate_and_set_dirty(addr1, l);
3471 qemu_put_ram_ptr(ptr);
3473 } else {
3474 if (!(memory_region_is_ram(section->mr) ||
3475 memory_region_is_romd(section->mr))) {
3476 target_phys_addr_t addr1;
3477 /* I/O case */
3478 addr1 = memory_region_section_addr(section, addr);
3479 if (l >= 4 && ((addr1 & 3) == 0)) {
3480 /* 32 bit read access */
3481 val = io_mem_read(section->mr, addr1, 4);
3482 stl_p(buf, val);
3483 l = 4;
3484 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3485 /* 16 bit read access */
3486 val = io_mem_read(section->mr, addr1, 2);
3487 stw_p(buf, val);
3488 l = 2;
3489 } else {
3490 /* 8 bit read access */
3491 val = io_mem_read(section->mr, addr1, 1);
3492 stb_p(buf, val);
3493 l = 1;
3495 } else {
3496 /* RAM case */
3497 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3498 + memory_region_section_addr(section,
3499 addr));
3500 memcpy(buf, ptr, l);
3501 qemu_put_ram_ptr(ptr);
3504 len -= l;
3505 buf += l;
3506 addr += l;
3510 /* used for ROM loading : can write in RAM and ROM */
3511 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3512 const uint8_t *buf, int len)
3514 int l;
3515 uint8_t *ptr;
3516 target_phys_addr_t page;
3517 MemoryRegionSection *section;
3519 while (len > 0) {
3520 page = addr & TARGET_PAGE_MASK;
3521 l = (page + TARGET_PAGE_SIZE) - addr;
3522 if (l > len)
3523 l = len;
3524 section = phys_page_find(page >> TARGET_PAGE_BITS);
3526 if (!(memory_region_is_ram(section->mr) ||
3527 memory_region_is_romd(section->mr))) {
3528 /* do nothing */
3529 } else {
3530 unsigned long addr1;
3531 addr1 = memory_region_get_ram_addr(section->mr)
3532 + memory_region_section_addr(section, addr);
3533 /* ROM/RAM case */
3534 ptr = qemu_get_ram_ptr(addr1);
3535 memcpy(ptr, buf, l);
3536 invalidate_and_set_dirty(addr1, l);
3537 qemu_put_ram_ptr(ptr);
3539 len -= l;
3540 buf += l;
3541 addr += l;
3545 typedef struct {
3546 void *buffer;
3547 target_phys_addr_t addr;
3548 target_phys_addr_t len;
3549 } BounceBuffer;
3551 static BounceBuffer bounce;
3553 typedef struct MapClient {
3554 void *opaque;
3555 void (*callback)(void *opaque);
3556 QLIST_ENTRY(MapClient) link;
3557 } MapClient;
3559 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3560 = QLIST_HEAD_INITIALIZER(map_client_list);
3562 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3564 MapClient *client = g_malloc(sizeof(*client));
3566 client->opaque = opaque;
3567 client->callback = callback;
3568 QLIST_INSERT_HEAD(&map_client_list, client, link);
3569 return client;
3572 void cpu_unregister_map_client(void *_client)
3574 MapClient *client = (MapClient *)_client;
3576 QLIST_REMOVE(client, link);
3577 g_free(client);
3580 static void cpu_notify_map_clients(void)
3582 MapClient *client;
3584 while (!QLIST_EMPTY(&map_client_list)) {
3585 client = QLIST_FIRST(&map_client_list);
3586 client->callback(client->opaque);
3587 cpu_unregister_map_client(client);
3591 /* Map a physical memory region into a host virtual address.
3592 * May map a subset of the requested range, given by and returned in *plen.
3593 * May return NULL if resources needed to perform the mapping are exhausted.
3594 * Use only for reads OR writes - not for read-modify-write operations.
3595 * Use cpu_register_map_client() to know when retrying the map operation is
3596 * likely to succeed.
3598 void *cpu_physical_memory_map(target_phys_addr_t addr,
3599 target_phys_addr_t *plen,
3600 int is_write)
3602 target_phys_addr_t len = *plen;
3603 target_phys_addr_t todo = 0;
3604 int l;
3605 target_phys_addr_t page;
3606 MemoryRegionSection *section;
3607 ram_addr_t raddr = RAM_ADDR_MAX;
3608 ram_addr_t rlen;
3609 void *ret;
3611 while (len > 0) {
3612 page = addr & TARGET_PAGE_MASK;
3613 l = (page + TARGET_PAGE_SIZE) - addr;
3614 if (l > len)
3615 l = len;
3616 section = phys_page_find(page >> TARGET_PAGE_BITS);
3618 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3619 if (todo || bounce.buffer) {
3620 break;
3622 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3623 bounce.addr = addr;
3624 bounce.len = l;
3625 if (!is_write) {
3626 cpu_physical_memory_read(addr, bounce.buffer, l);
3629 *plen = l;
3630 return bounce.buffer;
3632 if (!todo) {
3633 raddr = memory_region_get_ram_addr(section->mr)
3634 + memory_region_section_addr(section, addr);
3637 len -= l;
3638 addr += l;
3639 todo += l;
3641 rlen = todo;
3642 ret = qemu_ram_ptr_length(raddr, &rlen);
3643 *plen = rlen;
3644 return ret;
3647 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3648 * Will also mark the memory as dirty if is_write == 1. access_len gives
3649 * the amount of memory that was actually read or written by the caller.
3651 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3652 int is_write, target_phys_addr_t access_len)
3654 if (buffer != bounce.buffer) {
3655 if (is_write) {
3656 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3657 while (access_len) {
3658 unsigned l;
3659 l = TARGET_PAGE_SIZE;
3660 if (l > access_len)
3661 l = access_len;
3662 invalidate_and_set_dirty(addr1, l);
3663 addr1 += l;
3664 access_len -= l;
3667 if (xen_enabled()) {
3668 xen_invalidate_map_cache_entry(buffer);
3670 return;
3672 if (is_write) {
3673 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3675 qemu_vfree(bounce.buffer);
3676 bounce.buffer = NULL;
3677 cpu_notify_map_clients();
3680 /* warning: addr must be aligned */
3681 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3682 enum device_endian endian)
3684 uint8_t *ptr;
3685 uint32_t val;
3686 MemoryRegionSection *section;
3688 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3690 if (!(memory_region_is_ram(section->mr) ||
3691 memory_region_is_romd(section->mr))) {
3692 /* I/O case */
3693 addr = memory_region_section_addr(section, addr);
3694 val = io_mem_read(section->mr, addr, 4);
3695 #if defined(TARGET_WORDS_BIGENDIAN)
3696 if (endian == DEVICE_LITTLE_ENDIAN) {
3697 val = bswap32(val);
3699 #else
3700 if (endian == DEVICE_BIG_ENDIAN) {
3701 val = bswap32(val);
3703 #endif
3704 } else {
3705 /* RAM case */
3706 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3707 & TARGET_PAGE_MASK)
3708 + memory_region_section_addr(section, addr));
3709 switch (endian) {
3710 case DEVICE_LITTLE_ENDIAN:
3711 val = ldl_le_p(ptr);
3712 break;
3713 case DEVICE_BIG_ENDIAN:
3714 val = ldl_be_p(ptr);
3715 break;
3716 default:
3717 val = ldl_p(ptr);
3718 break;
3721 return val;
3724 uint32_t ldl_phys(target_phys_addr_t addr)
3726 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3729 uint32_t ldl_le_phys(target_phys_addr_t addr)
3731 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3734 uint32_t ldl_be_phys(target_phys_addr_t addr)
3736 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3739 /* warning: addr must be aligned */
3740 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3741 enum device_endian endian)
3743 uint8_t *ptr;
3744 uint64_t val;
3745 MemoryRegionSection *section;
3747 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3749 if (!(memory_region_is_ram(section->mr) ||
3750 memory_region_is_romd(section->mr))) {
3751 /* I/O case */
3752 addr = memory_region_section_addr(section, addr);
3754 /* XXX This is broken when device endian != cpu endian.
3755 Fix and add "endian" variable check */
3756 #ifdef TARGET_WORDS_BIGENDIAN
3757 val = io_mem_read(section->mr, addr, 4) << 32;
3758 val |= io_mem_read(section->mr, addr + 4, 4);
3759 #else
3760 val = io_mem_read(section->mr, addr, 4);
3761 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3762 #endif
3763 } else {
3764 /* RAM case */
3765 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3766 & TARGET_PAGE_MASK)
3767 + memory_region_section_addr(section, addr));
3768 switch (endian) {
3769 case DEVICE_LITTLE_ENDIAN:
3770 val = ldq_le_p(ptr);
3771 break;
3772 case DEVICE_BIG_ENDIAN:
3773 val = ldq_be_p(ptr);
3774 break;
3775 default:
3776 val = ldq_p(ptr);
3777 break;
3780 return val;
3783 uint64_t ldq_phys(target_phys_addr_t addr)
3785 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3788 uint64_t ldq_le_phys(target_phys_addr_t addr)
3790 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3793 uint64_t ldq_be_phys(target_phys_addr_t addr)
3795 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3798 /* XXX: optimize */
3799 uint32_t ldub_phys(target_phys_addr_t addr)
3801 uint8_t val;
3802 cpu_physical_memory_read(addr, &val, 1);
3803 return val;
3806 /* warning: addr must be aligned */
3807 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3808 enum device_endian endian)
3810 uint8_t *ptr;
3811 uint64_t val;
3812 MemoryRegionSection *section;
3814 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3816 if (!(memory_region_is_ram(section->mr) ||
3817 memory_region_is_romd(section->mr))) {
3818 /* I/O case */
3819 addr = memory_region_section_addr(section, addr);
3820 val = io_mem_read(section->mr, addr, 2);
3821 #if defined(TARGET_WORDS_BIGENDIAN)
3822 if (endian == DEVICE_LITTLE_ENDIAN) {
3823 val = bswap16(val);
3825 #else
3826 if (endian == DEVICE_BIG_ENDIAN) {
3827 val = bswap16(val);
3829 #endif
3830 } else {
3831 /* RAM case */
3832 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3833 & TARGET_PAGE_MASK)
3834 + memory_region_section_addr(section, addr));
3835 switch (endian) {
3836 case DEVICE_LITTLE_ENDIAN:
3837 val = lduw_le_p(ptr);
3838 break;
3839 case DEVICE_BIG_ENDIAN:
3840 val = lduw_be_p(ptr);
3841 break;
3842 default:
3843 val = lduw_p(ptr);
3844 break;
3847 return val;
3850 uint32_t lduw_phys(target_phys_addr_t addr)
3852 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3855 uint32_t lduw_le_phys(target_phys_addr_t addr)
3857 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3860 uint32_t lduw_be_phys(target_phys_addr_t addr)
3862 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3865 /* warning: addr must be aligned. The ram page is not masked as dirty
3866 and the code inside is not invalidated. It is useful if the dirty
3867 bits are used to track modified PTEs */
3868 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3870 uint8_t *ptr;
3871 MemoryRegionSection *section;
3873 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3875 if (!memory_region_is_ram(section->mr) || section->readonly) {
3876 addr = memory_region_section_addr(section, addr);
3877 if (memory_region_is_ram(section->mr)) {
3878 section = &phys_sections[phys_section_rom];
3880 io_mem_write(section->mr, addr, val, 4);
3881 } else {
3882 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3883 & TARGET_PAGE_MASK)
3884 + memory_region_section_addr(section, addr);
3885 ptr = qemu_get_ram_ptr(addr1);
3886 stl_p(ptr, val);
3888 if (unlikely(in_migration)) {
3889 if (!cpu_physical_memory_is_dirty(addr1)) {
3890 /* invalidate code */
3891 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3892 /* set dirty bit */
3893 cpu_physical_memory_set_dirty_flags(
3894 addr1, (0xff & ~CODE_DIRTY_FLAG));
3900 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3902 uint8_t *ptr;
3903 MemoryRegionSection *section;
3905 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3907 if (!memory_region_is_ram(section->mr) || section->readonly) {
3908 addr = memory_region_section_addr(section, addr);
3909 if (memory_region_is_ram(section->mr)) {
3910 section = &phys_sections[phys_section_rom];
3912 #ifdef TARGET_WORDS_BIGENDIAN
3913 io_mem_write(section->mr, addr, val >> 32, 4);
3914 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3915 #else
3916 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3917 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3918 #endif
3919 } else {
3920 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3921 & TARGET_PAGE_MASK)
3922 + memory_region_section_addr(section, addr));
3923 stq_p(ptr, val);
3927 /* warning: addr must be aligned */
3928 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
3929 enum device_endian endian)
3931 uint8_t *ptr;
3932 MemoryRegionSection *section;
3934 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3936 if (!memory_region_is_ram(section->mr) || section->readonly) {
3937 addr = memory_region_section_addr(section, addr);
3938 if (memory_region_is_ram(section->mr)) {
3939 section = &phys_sections[phys_section_rom];
3941 #if defined(TARGET_WORDS_BIGENDIAN)
3942 if (endian == DEVICE_LITTLE_ENDIAN) {
3943 val = bswap32(val);
3945 #else
3946 if (endian == DEVICE_BIG_ENDIAN) {
3947 val = bswap32(val);
3949 #endif
3950 io_mem_write(section->mr, addr, val, 4);
3951 } else {
3952 unsigned long addr1;
3953 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3954 + memory_region_section_addr(section, addr);
3955 /* RAM case */
3956 ptr = qemu_get_ram_ptr(addr1);
3957 switch (endian) {
3958 case DEVICE_LITTLE_ENDIAN:
3959 stl_le_p(ptr, val);
3960 break;
3961 case DEVICE_BIG_ENDIAN:
3962 stl_be_p(ptr, val);
3963 break;
3964 default:
3965 stl_p(ptr, val);
3966 break;
3968 invalidate_and_set_dirty(addr1, 4);
3972 void stl_phys(target_phys_addr_t addr, uint32_t val)
3974 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3977 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
3979 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3982 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
3984 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3987 /* XXX: optimize */
3988 void stb_phys(target_phys_addr_t addr, uint32_t val)
3990 uint8_t v = val;
3991 cpu_physical_memory_write(addr, &v, 1);
3994 /* warning: addr must be aligned */
3995 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
3996 enum device_endian endian)
3998 uint8_t *ptr;
3999 MemoryRegionSection *section;
4001 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4003 if (!memory_region_is_ram(section->mr) || section->readonly) {
4004 addr = memory_region_section_addr(section, addr);
4005 if (memory_region_is_ram(section->mr)) {
4006 section = &phys_sections[phys_section_rom];
4008 #if defined(TARGET_WORDS_BIGENDIAN)
4009 if (endian == DEVICE_LITTLE_ENDIAN) {
4010 val = bswap16(val);
4012 #else
4013 if (endian == DEVICE_BIG_ENDIAN) {
4014 val = bswap16(val);
4016 #endif
4017 io_mem_write(section->mr, addr, val, 2);
4018 } else {
4019 unsigned long addr1;
4020 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4021 + memory_region_section_addr(section, addr);
4022 /* RAM case */
4023 ptr = qemu_get_ram_ptr(addr1);
4024 switch (endian) {
4025 case DEVICE_LITTLE_ENDIAN:
4026 stw_le_p(ptr, val);
4027 break;
4028 case DEVICE_BIG_ENDIAN:
4029 stw_be_p(ptr, val);
4030 break;
4031 default:
4032 stw_p(ptr, val);
4033 break;
4035 invalidate_and_set_dirty(addr1, 2);
4039 void stw_phys(target_phys_addr_t addr, uint32_t val)
4041 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4044 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4046 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4049 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4051 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4054 /* XXX: optimize */
4055 void stq_phys(target_phys_addr_t addr, uint64_t val)
4057 val = tswap64(val);
4058 cpu_physical_memory_write(addr, &val, 8);
4061 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4063 val = cpu_to_le64(val);
4064 cpu_physical_memory_write(addr, &val, 8);
4067 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4069 val = cpu_to_be64(val);
4070 cpu_physical_memory_write(addr, &val, 8);
4073 /* virtual memory access for debug (includes writing to ROM) */
4074 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4075 uint8_t *buf, int len, int is_write)
4077 int l;
4078 target_phys_addr_t phys_addr;
4079 target_ulong page;
4081 while (len > 0) {
4082 page = addr & TARGET_PAGE_MASK;
4083 phys_addr = cpu_get_phys_page_debug(env, page);
4084 /* if no physical page mapped, return an error */
4085 if (phys_addr == -1)
4086 return -1;
4087 l = (page + TARGET_PAGE_SIZE) - addr;
4088 if (l > len)
4089 l = len;
4090 phys_addr += (addr & ~TARGET_PAGE_MASK);
4091 if (is_write)
4092 cpu_physical_memory_write_rom(phys_addr, buf, l);
4093 else
4094 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4095 len -= l;
4096 buf += l;
4097 addr += l;
4099 return 0;
4101 #endif
4103 /* in deterministic execution mode, instructions doing device I/Os
4104 must be at the end of the TB */
4105 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4107 TranslationBlock *tb;
4108 uint32_t n, cflags;
4109 target_ulong pc, cs_base;
4110 uint64_t flags;
4112 tb = tb_find_pc(retaddr);
4113 if (!tb) {
4114 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4115 (void *)retaddr);
4117 n = env->icount_decr.u16.low + tb->icount;
4118 cpu_restore_state(tb, env, retaddr);
4119 /* Calculate how many instructions had been executed before the fault
4120 occurred. */
4121 n = n - env->icount_decr.u16.low;
4122 /* Generate a new TB ending on the I/O insn. */
4123 n++;
4124 /* On MIPS and SH, delay slot instructions can only be restarted if
4125 they were already the first instruction in the TB. If this is not
4126 the first instruction in a TB then re-execute the preceding
4127 branch. */
4128 #if defined(TARGET_MIPS)
4129 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4130 env->active_tc.PC -= 4;
4131 env->icount_decr.u16.low++;
4132 env->hflags &= ~MIPS_HFLAG_BMASK;
4134 #elif defined(TARGET_SH4)
4135 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4136 && n > 1) {
4137 env->pc -= 2;
4138 env->icount_decr.u16.low++;
4139 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4141 #endif
4142 /* This should never happen. */
4143 if (n > CF_COUNT_MASK)
4144 cpu_abort(env, "TB too big during recompile");
4146 cflags = n | CF_LAST_IO;
4147 pc = tb->pc;
4148 cs_base = tb->cs_base;
4149 flags = tb->flags;
4150 tb_phys_invalidate(tb, -1);
4151 /* FIXME: In theory this could raise an exception. In practice
4152 we have already translated the block once so it's probably ok. */
4153 tb_gen_code(env, pc, cs_base, flags, cflags);
4154 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4155 the first in the TB) then we end up generating a whole new TB and
4156 repeating the fault, which is horribly inefficient.
4157 Better would be to execute just this insn uncached, or generate a
4158 second new TB. */
4159 cpu_resume_from_signal(env, NULL);
4162 #if !defined(CONFIG_USER_ONLY)
4164 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4166 int i, target_code_size, max_target_code_size;
4167 int direct_jmp_count, direct_jmp2_count, cross_page;
4168 TranslationBlock *tb;
4170 target_code_size = 0;
4171 max_target_code_size = 0;
4172 cross_page = 0;
4173 direct_jmp_count = 0;
4174 direct_jmp2_count = 0;
4175 for(i = 0; i < nb_tbs; i++) {
4176 tb = &tbs[i];
4177 target_code_size += tb->size;
4178 if (tb->size > max_target_code_size)
4179 max_target_code_size = tb->size;
4180 if (tb->page_addr[1] != -1)
4181 cross_page++;
4182 if (tb->tb_next_offset[0] != 0xffff) {
4183 direct_jmp_count++;
4184 if (tb->tb_next_offset[1] != 0xffff) {
4185 direct_jmp2_count++;
4189 /* XXX: avoid using doubles ? */
4190 cpu_fprintf(f, "Translation buffer state:\n");
4191 cpu_fprintf(f, "gen code size %td/%ld\n",
4192 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4193 cpu_fprintf(f, "TB count %d/%d\n",
4194 nb_tbs, code_gen_max_blocks);
4195 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4196 nb_tbs ? target_code_size / nb_tbs : 0,
4197 max_target_code_size);
4198 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4199 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4200 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4201 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4202 cross_page,
4203 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4204 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4205 direct_jmp_count,
4206 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4207 direct_jmp2_count,
4208 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4209 cpu_fprintf(f, "\nStatistics:\n");
4210 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4211 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4212 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4213 tcg_dump_info(f, cpu_fprintf);
4217 * A helper function for the _utterly broken_ virtio device model to find out if
4218 * it's running on a big endian machine. Don't do this at home kids!
4220 bool virtio_is_big_endian(void);
4221 bool virtio_is_big_endian(void)
4223 #if defined(TARGET_WORDS_BIGENDIAN)
4224 return true;
4225 #else
4226 return false;
4227 #endif
4230 #endif
4232 #ifndef CONFIG_USER_ONLY
4233 bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4235 MemoryRegionSection *section;
4237 section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4239 return !(memory_region_is_ram(section->mr) ||
4240 memory_region_is_romd(section->mr));
4242 #endif