pseries: Consolidate hack for RTAS display-character usage
[qemu/agraf.git] / exec.c
blob68b8a6a2ba75f42177cca3c7fab74ff8ee5272b5
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #define WANT_EXEC_OBSOLETE
61 #include "exec-obsolete.h"
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
118 static MemoryRegion *system_memory;
119 static MemoryRegion *system_io;
121 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
122 static MemoryRegion io_mem_subpage_ram;
124 #endif
126 CPUArchState *first_cpu;
127 /* current CPU in the current thread. It is only valid inside
128 cpu_exec() */
129 DEFINE_TLS(CPUArchState *,cpu_single_env);
130 /* 0 = Do not count executed instructions.
131 1 = Precise instruction counting.
132 2 = Adaptive rate instruction counting. */
133 int use_icount = 0;
135 typedef struct PageDesc {
136 /* list of TBs intersecting this ram page */
137 TranslationBlock *first_tb;
138 /* in order to optimize self modifying code, we count the number
139 of lookups we do to a given page to use a bitmap */
140 unsigned int code_write_count;
141 uint8_t *code_bitmap;
142 #if defined(CONFIG_USER_ONLY)
143 unsigned long flags;
144 #endif
145 } PageDesc;
147 /* In system mode we want L1_MAP to be based on ram offsets,
148 while in user mode we want it to be based on virtual addresses. */
149 #if !defined(CONFIG_USER_ONLY)
150 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
151 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
154 #endif
155 #else
156 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
157 #endif
159 /* Size of the L2 (and L3, etc) page tables. */
160 #define L2_BITS 10
161 #define L2_SIZE (1 << L2_BITS)
163 #define P_L2_LEVELS \
164 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
166 /* The bits remaining after N lower levels of page tables. */
167 #define V_L1_BITS_REM \
168 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
170 #if V_L1_BITS_REM < 4
171 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
172 #else
173 #define V_L1_BITS V_L1_BITS_REM
174 #endif
176 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
178 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
180 unsigned long qemu_real_host_page_size;
181 unsigned long qemu_host_page_size;
182 unsigned long qemu_host_page_mask;
184 /* This is a multi-level map on the virtual address space.
185 The bottom level has pointers to PageDesc. */
186 static void *l1_map[V_L1_SIZE];
188 #if !defined(CONFIG_USER_ONLY)
189 typedef struct PhysPageEntry PhysPageEntry;
191 static MemoryRegionSection *phys_sections;
192 static unsigned phys_sections_nb, phys_sections_nb_alloc;
193 static uint16_t phys_section_unassigned;
194 static uint16_t phys_section_notdirty;
195 static uint16_t phys_section_rom;
196 static uint16_t phys_section_watch;
198 struct PhysPageEntry {
199 uint16_t is_leaf : 1;
200 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
201 uint16_t ptr : 15;
204 /* Simple allocator for PhysPageEntry nodes */
205 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
206 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
208 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
210 /* This is a multi-level map on the physical address space.
211 The bottom level has pointers to MemoryRegionSections. */
212 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
214 static void io_mem_init(void);
215 static void memory_map_init(void);
217 static MemoryRegion io_mem_watch;
218 #endif
220 /* log support */
221 #ifdef WIN32
222 static const char *logfilename = "qemu.log";
223 #else
224 static const char *logfilename = "/tmp/qemu.log";
225 #endif
226 FILE *logfile;
227 int loglevel;
228 static int log_append = 0;
230 /* statistics */
231 #if !defined(CONFIG_USER_ONLY)
232 static int tlb_flush_count;
233 #endif
234 static int tb_flush_count;
235 static int tb_phys_invalidate_count;
237 #ifdef _WIN32
238 static void map_exec(void *addr, long size)
240 DWORD old_protect;
241 VirtualProtect(addr, size,
242 PAGE_EXECUTE_READWRITE, &old_protect);
245 #else
246 static void map_exec(void *addr, long size)
248 unsigned long start, end, page_size;
250 page_size = getpagesize();
251 start = (unsigned long)addr;
252 start &= ~(page_size - 1);
254 end = (unsigned long)addr + size;
255 end += page_size - 1;
256 end &= ~(page_size - 1);
258 mprotect((void *)start, end - start,
259 PROT_READ | PROT_WRITE | PROT_EXEC);
261 #endif
263 static void page_init(void)
265 /* NOTE: we can always suppose that qemu_host_page_size >=
266 TARGET_PAGE_SIZE */
267 #ifdef _WIN32
269 SYSTEM_INFO system_info;
271 GetSystemInfo(&system_info);
272 qemu_real_host_page_size = system_info.dwPageSize;
274 #else
275 qemu_real_host_page_size = getpagesize();
276 #endif
277 if (qemu_host_page_size == 0)
278 qemu_host_page_size = qemu_real_host_page_size;
279 if (qemu_host_page_size < TARGET_PAGE_SIZE)
280 qemu_host_page_size = TARGET_PAGE_SIZE;
281 qemu_host_page_mask = ~(qemu_host_page_size - 1);
283 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
285 #ifdef HAVE_KINFO_GETVMMAP
286 struct kinfo_vmentry *freep;
287 int i, cnt;
289 freep = kinfo_getvmmap(getpid(), &cnt);
290 if (freep) {
291 mmap_lock();
292 for (i = 0; i < cnt; i++) {
293 unsigned long startaddr, endaddr;
295 startaddr = freep[i].kve_start;
296 endaddr = freep[i].kve_end;
297 if (h2g_valid(startaddr)) {
298 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
300 if (h2g_valid(endaddr)) {
301 endaddr = h2g(endaddr);
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
303 } else {
304 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
305 endaddr = ~0ul;
306 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
307 #endif
311 free(freep);
312 mmap_unlock();
314 #else
315 FILE *f;
317 last_brk = (unsigned long)sbrk(0);
319 f = fopen("/compat/linux/proc/self/maps", "r");
320 if (f) {
321 mmap_lock();
323 do {
324 unsigned long startaddr, endaddr;
325 int n;
327 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
329 if (n == 2 && h2g_valid(startaddr)) {
330 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
332 if (h2g_valid(endaddr)) {
333 endaddr = h2g(endaddr);
334 } else {
335 endaddr = ~0ul;
337 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
339 } while (!feof(f));
341 fclose(f);
342 mmap_unlock();
344 #endif
346 #endif
349 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
351 PageDesc *pd;
352 void **lp;
353 int i;
355 #if defined(CONFIG_USER_ONLY)
356 /* We can't use g_malloc because it may recurse into a locked mutex. */
357 # define ALLOC(P, SIZE) \
358 do { \
359 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
360 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
361 } while (0)
362 #else
363 # define ALLOC(P, SIZE) \
364 do { P = g_malloc0(SIZE); } while (0)
365 #endif
367 /* Level 1. Always allocated. */
368 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
370 /* Level 2..N-1. */
371 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
372 void **p = *lp;
374 if (p == NULL) {
375 if (!alloc) {
376 return NULL;
378 ALLOC(p, sizeof(void *) * L2_SIZE);
379 *lp = p;
382 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
385 pd = *lp;
386 if (pd == NULL) {
387 if (!alloc) {
388 return NULL;
390 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
391 *lp = pd;
394 #undef ALLOC
396 return pd + (index & (L2_SIZE - 1));
399 static inline PageDesc *page_find(tb_page_addr_t index)
401 return page_find_alloc(index, 0);
404 #if !defined(CONFIG_USER_ONLY)
406 static void phys_map_node_reserve(unsigned nodes)
408 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
409 typedef PhysPageEntry Node[L2_SIZE];
410 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
411 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
412 phys_map_nodes_nb + nodes);
413 phys_map_nodes = g_renew(Node, phys_map_nodes,
414 phys_map_nodes_nb_alloc);
418 static uint16_t phys_map_node_alloc(void)
420 unsigned i;
421 uint16_t ret;
423 ret = phys_map_nodes_nb++;
424 assert(ret != PHYS_MAP_NODE_NIL);
425 assert(ret != phys_map_nodes_nb_alloc);
426 for (i = 0; i < L2_SIZE; ++i) {
427 phys_map_nodes[ret][i].is_leaf = 0;
428 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
430 return ret;
433 static void phys_map_nodes_reset(void)
435 phys_map_nodes_nb = 0;
439 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
440 target_phys_addr_t *nb, uint16_t leaf,
441 int level)
443 PhysPageEntry *p;
444 int i;
445 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
447 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
448 lp->ptr = phys_map_node_alloc();
449 p = phys_map_nodes[lp->ptr];
450 if (level == 0) {
451 for (i = 0; i < L2_SIZE; i++) {
452 p[i].is_leaf = 1;
453 p[i].ptr = phys_section_unassigned;
456 } else {
457 p = phys_map_nodes[lp->ptr];
459 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
461 while (*nb && lp < &p[L2_SIZE]) {
462 if ((*index & (step - 1)) == 0 && *nb >= step) {
463 lp->is_leaf = true;
464 lp->ptr = leaf;
465 *index += step;
466 *nb -= step;
467 } else {
468 phys_page_set_level(lp, index, nb, leaf, level - 1);
470 ++lp;
474 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
475 uint16_t leaf)
477 /* Wildly overreserve - it doesn't matter much. */
478 phys_map_node_reserve(3 * P_L2_LEVELS);
480 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
483 static MemoryRegionSection *phys_page_find(target_phys_addr_t index)
485 PhysPageEntry lp = phys_map;
486 PhysPageEntry *p;
487 int i;
488 uint16_t s_index = phys_section_unassigned;
490 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
491 if (lp.ptr == PHYS_MAP_NODE_NIL) {
492 goto not_found;
494 p = phys_map_nodes[lp.ptr];
495 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
498 s_index = lp.ptr;
499 not_found:
500 return &phys_sections[s_index];
503 static target_phys_addr_t section_addr(MemoryRegionSection *section,
504 target_phys_addr_t addr)
506 addr -= section->offset_within_address_space;
507 addr += section->offset_within_region;
508 return addr;
511 static void tlb_protect_code(ram_addr_t ram_addr);
512 static void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
513 target_ulong vaddr);
514 #define mmap_lock() do { } while(0)
515 #define mmap_unlock() do { } while(0)
516 #endif
518 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
520 #if defined(CONFIG_USER_ONLY)
521 /* Currently it is not recommended to allocate big chunks of data in
522 user mode. It will change when a dedicated libc will be used */
523 #define USE_STATIC_CODE_GEN_BUFFER
524 #endif
526 #ifdef USE_STATIC_CODE_GEN_BUFFER
527 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
528 __attribute__((aligned (CODE_GEN_ALIGN)));
529 #endif
531 static void code_gen_alloc(unsigned long tb_size)
533 #ifdef USE_STATIC_CODE_GEN_BUFFER
534 code_gen_buffer = static_code_gen_buffer;
535 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
536 map_exec(code_gen_buffer, code_gen_buffer_size);
537 #else
538 code_gen_buffer_size = tb_size;
539 if (code_gen_buffer_size == 0) {
540 #if defined(CONFIG_USER_ONLY)
541 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
542 #else
543 /* XXX: needs adjustments */
544 code_gen_buffer_size = (unsigned long)(ram_size / 4);
545 #endif
547 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
548 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
549 /* The code gen buffer location may have constraints depending on
550 the host cpu and OS */
551 #if defined(__linux__)
553 int flags;
554 void *start = NULL;
556 flags = MAP_PRIVATE | MAP_ANONYMOUS;
557 #if defined(__x86_64__)
558 flags |= MAP_32BIT;
559 /* Cannot map more than that */
560 if (code_gen_buffer_size > (800 * 1024 * 1024))
561 code_gen_buffer_size = (800 * 1024 * 1024);
562 #elif defined(__sparc_v9__)
563 // Map the buffer below 2G, so we can use direct calls and branches
564 flags |= MAP_FIXED;
565 start = (void *) 0x60000000UL;
566 if (code_gen_buffer_size > (512 * 1024 * 1024))
567 code_gen_buffer_size = (512 * 1024 * 1024);
568 #elif defined(__arm__)
569 /* Keep the buffer no bigger than 16MB to branch between blocks */
570 if (code_gen_buffer_size > 16 * 1024 * 1024)
571 code_gen_buffer_size = 16 * 1024 * 1024;
572 #elif defined(__s390x__)
573 /* Map the buffer so that we can use direct calls and branches. */
574 /* We have a +- 4GB range on the branches; leave some slop. */
575 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
576 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
578 start = (void *)0x90000000UL;
579 #endif
580 code_gen_buffer = mmap(start, code_gen_buffer_size,
581 PROT_WRITE | PROT_READ | PROT_EXEC,
582 flags, -1, 0);
583 if (code_gen_buffer == MAP_FAILED) {
584 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
585 exit(1);
588 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
589 || defined(__DragonFly__) || defined(__OpenBSD__) \
590 || defined(__NetBSD__)
592 int flags;
593 void *addr = NULL;
594 flags = MAP_PRIVATE | MAP_ANONYMOUS;
595 #if defined(__x86_64__)
596 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
597 * 0x40000000 is free */
598 flags |= MAP_FIXED;
599 addr = (void *)0x40000000;
600 /* Cannot map more than that */
601 if (code_gen_buffer_size > (800 * 1024 * 1024))
602 code_gen_buffer_size = (800 * 1024 * 1024);
603 #elif defined(__sparc_v9__)
604 // Map the buffer below 2G, so we can use direct calls and branches
605 flags |= MAP_FIXED;
606 addr = (void *) 0x60000000UL;
607 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
608 code_gen_buffer_size = (512 * 1024 * 1024);
610 #endif
611 code_gen_buffer = mmap(addr, code_gen_buffer_size,
612 PROT_WRITE | PROT_READ | PROT_EXEC,
613 flags, -1, 0);
614 if (code_gen_buffer == MAP_FAILED) {
615 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
616 exit(1);
619 #else
620 code_gen_buffer = g_malloc(code_gen_buffer_size);
621 map_exec(code_gen_buffer, code_gen_buffer_size);
622 #endif
623 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
624 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
625 code_gen_buffer_max_size = code_gen_buffer_size -
626 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
627 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
628 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
631 /* Must be called before using the QEMU cpus. 'tb_size' is the size
632 (in bytes) allocated to the translation buffer. Zero means default
633 size. */
634 void tcg_exec_init(unsigned long tb_size)
636 cpu_gen_init();
637 code_gen_alloc(tb_size);
638 code_gen_ptr = code_gen_buffer;
639 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
640 page_init();
641 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
642 /* There's no guest base to take into account, so go ahead and
643 initialize the prologue now. */
644 tcg_prologue_init(&tcg_ctx);
645 #endif
648 bool tcg_enabled(void)
650 return code_gen_buffer != NULL;
653 void cpu_exec_init_all(void)
655 #if !defined(CONFIG_USER_ONLY)
656 memory_map_init();
657 io_mem_init();
658 #endif
661 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
663 static int cpu_common_post_load(void *opaque, int version_id)
665 CPUArchState *env = opaque;
667 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
668 version_id is increased. */
669 env->interrupt_request &= ~0x01;
670 tlb_flush(env, 1);
672 return 0;
675 static const VMStateDescription vmstate_cpu_common = {
676 .name = "cpu_common",
677 .version_id = 1,
678 .minimum_version_id = 1,
679 .minimum_version_id_old = 1,
680 .post_load = cpu_common_post_load,
681 .fields = (VMStateField []) {
682 VMSTATE_UINT32(halted, CPUArchState),
683 VMSTATE_UINT32(interrupt_request, CPUArchState),
684 VMSTATE_END_OF_LIST()
687 #endif
689 CPUArchState *qemu_get_cpu(int cpu)
691 CPUArchState *env = first_cpu;
693 while (env) {
694 if (env->cpu_index == cpu)
695 break;
696 env = env->next_cpu;
699 return env;
702 void cpu_exec_init(CPUArchState *env)
704 CPUArchState **penv;
705 int cpu_index;
707 #if defined(CONFIG_USER_ONLY)
708 cpu_list_lock();
709 #endif
710 env->next_cpu = NULL;
711 penv = &first_cpu;
712 cpu_index = 0;
713 while (*penv != NULL) {
714 penv = &(*penv)->next_cpu;
715 cpu_index++;
717 env->cpu_index = cpu_index;
718 env->numa_node = 0;
719 QTAILQ_INIT(&env->breakpoints);
720 QTAILQ_INIT(&env->watchpoints);
721 #ifndef CONFIG_USER_ONLY
722 env->thread_id = qemu_get_thread_id();
723 #endif
724 *penv = env;
725 #if defined(CONFIG_USER_ONLY)
726 cpu_list_unlock();
727 #endif
728 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
729 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
730 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
731 cpu_save, cpu_load, env);
732 #endif
735 /* Allocate a new translation block. Flush the translation buffer if
736 too many translation blocks or too much generated code. */
737 static TranslationBlock *tb_alloc(target_ulong pc)
739 TranslationBlock *tb;
741 if (nb_tbs >= code_gen_max_blocks ||
742 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
743 return NULL;
744 tb = &tbs[nb_tbs++];
745 tb->pc = pc;
746 tb->cflags = 0;
747 return tb;
750 void tb_free(TranslationBlock *tb)
752 /* In practice this is mostly used for single use temporary TB
753 Ignore the hard cases and just back up if this TB happens to
754 be the last one generated. */
755 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
756 code_gen_ptr = tb->tc_ptr;
757 nb_tbs--;
761 static inline void invalidate_page_bitmap(PageDesc *p)
763 if (p->code_bitmap) {
764 g_free(p->code_bitmap);
765 p->code_bitmap = NULL;
767 p->code_write_count = 0;
770 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
772 static void page_flush_tb_1 (int level, void **lp)
774 int i;
776 if (*lp == NULL) {
777 return;
779 if (level == 0) {
780 PageDesc *pd = *lp;
781 for (i = 0; i < L2_SIZE; ++i) {
782 pd[i].first_tb = NULL;
783 invalidate_page_bitmap(pd + i);
785 } else {
786 void **pp = *lp;
787 for (i = 0; i < L2_SIZE; ++i) {
788 page_flush_tb_1 (level - 1, pp + i);
793 static void page_flush_tb(void)
795 int i;
796 for (i = 0; i < V_L1_SIZE; i++) {
797 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
801 /* flush all the translation blocks */
802 /* XXX: tb_flush is currently not thread safe */
803 void tb_flush(CPUArchState *env1)
805 CPUArchState *env;
806 #if defined(DEBUG_FLUSH)
807 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
808 (unsigned long)(code_gen_ptr - code_gen_buffer),
809 nb_tbs, nb_tbs > 0 ?
810 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
811 #endif
812 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
813 cpu_abort(env1, "Internal error: code buffer overflow\n");
815 nb_tbs = 0;
817 for(env = first_cpu; env != NULL; env = env->next_cpu) {
818 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
821 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
822 page_flush_tb();
824 code_gen_ptr = code_gen_buffer;
825 /* XXX: flush processor icache at this point if cache flush is
826 expensive */
827 tb_flush_count++;
830 #ifdef DEBUG_TB_CHECK
832 static void tb_invalidate_check(target_ulong address)
834 TranslationBlock *tb;
835 int i;
836 address &= TARGET_PAGE_MASK;
837 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
838 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
839 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
840 address >= tb->pc + tb->size)) {
841 printf("ERROR invalidate: address=" TARGET_FMT_lx
842 " PC=%08lx size=%04x\n",
843 address, (long)tb->pc, tb->size);
849 /* verify that all the pages have correct rights for code */
850 static void tb_page_check(void)
852 TranslationBlock *tb;
853 int i, flags1, flags2;
855 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
856 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
857 flags1 = page_get_flags(tb->pc);
858 flags2 = page_get_flags(tb->pc + tb->size - 1);
859 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
860 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
861 (long)tb->pc, tb->size, flags1, flags2);
867 #endif
869 /* invalidate one TB */
870 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
871 int next_offset)
873 TranslationBlock *tb1;
874 for(;;) {
875 tb1 = *ptb;
876 if (tb1 == tb) {
877 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
878 break;
880 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
884 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
886 TranslationBlock *tb1;
887 unsigned int n1;
889 for(;;) {
890 tb1 = *ptb;
891 n1 = (long)tb1 & 3;
892 tb1 = (TranslationBlock *)((long)tb1 & ~3);
893 if (tb1 == tb) {
894 *ptb = tb1->page_next[n1];
895 break;
897 ptb = &tb1->page_next[n1];
901 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
903 TranslationBlock *tb1, **ptb;
904 unsigned int n1;
906 ptb = &tb->jmp_next[n];
907 tb1 = *ptb;
908 if (tb1) {
909 /* find tb(n) in circular list */
910 for(;;) {
911 tb1 = *ptb;
912 n1 = (long)tb1 & 3;
913 tb1 = (TranslationBlock *)((long)tb1 & ~3);
914 if (n1 == n && tb1 == tb)
915 break;
916 if (n1 == 2) {
917 ptb = &tb1->jmp_first;
918 } else {
919 ptb = &tb1->jmp_next[n1];
922 /* now we can suppress tb(n) from the list */
923 *ptb = tb->jmp_next[n];
925 tb->jmp_next[n] = NULL;
929 /* reset the jump entry 'n' of a TB so that it is not chained to
930 another TB */
931 static inline void tb_reset_jump(TranslationBlock *tb, int n)
933 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
936 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
938 CPUArchState *env;
939 PageDesc *p;
940 unsigned int h, n1;
941 tb_page_addr_t phys_pc;
942 TranslationBlock *tb1, *tb2;
944 /* remove the TB from the hash list */
945 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
946 h = tb_phys_hash_func(phys_pc);
947 tb_remove(&tb_phys_hash[h], tb,
948 offsetof(TranslationBlock, phys_hash_next));
950 /* remove the TB from the page list */
951 if (tb->page_addr[0] != page_addr) {
952 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
953 tb_page_remove(&p->first_tb, tb);
954 invalidate_page_bitmap(p);
956 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
957 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
958 tb_page_remove(&p->first_tb, tb);
959 invalidate_page_bitmap(p);
962 tb_invalidated_flag = 1;
964 /* remove the TB from the hash list */
965 h = tb_jmp_cache_hash_func(tb->pc);
966 for(env = first_cpu; env != NULL; env = env->next_cpu) {
967 if (env->tb_jmp_cache[h] == tb)
968 env->tb_jmp_cache[h] = NULL;
971 /* suppress this TB from the two jump lists */
972 tb_jmp_remove(tb, 0);
973 tb_jmp_remove(tb, 1);
975 /* suppress any remaining jumps to this TB */
976 tb1 = tb->jmp_first;
977 for(;;) {
978 n1 = (long)tb1 & 3;
979 if (n1 == 2)
980 break;
981 tb1 = (TranslationBlock *)((long)tb1 & ~3);
982 tb2 = tb1->jmp_next[n1];
983 tb_reset_jump(tb1, n1);
984 tb1->jmp_next[n1] = NULL;
985 tb1 = tb2;
987 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
989 tb_phys_invalidate_count++;
992 static inline void set_bits(uint8_t *tab, int start, int len)
994 int end, mask, end1;
996 end = start + len;
997 tab += start >> 3;
998 mask = 0xff << (start & 7);
999 if ((start & ~7) == (end & ~7)) {
1000 if (start < end) {
1001 mask &= ~(0xff << (end & 7));
1002 *tab |= mask;
1004 } else {
1005 *tab++ |= mask;
1006 start = (start + 8) & ~7;
1007 end1 = end & ~7;
1008 while (start < end1) {
1009 *tab++ = 0xff;
1010 start += 8;
1012 if (start < end) {
1013 mask = ~(0xff << (end & 7));
1014 *tab |= mask;
1019 static void build_page_bitmap(PageDesc *p)
1021 int n, tb_start, tb_end;
1022 TranslationBlock *tb;
1024 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1026 tb = p->first_tb;
1027 while (tb != NULL) {
1028 n = (long)tb & 3;
1029 tb = (TranslationBlock *)((long)tb & ~3);
1030 /* NOTE: this is subtle as a TB may span two physical pages */
1031 if (n == 0) {
1032 /* NOTE: tb_end may be after the end of the page, but
1033 it is not a problem */
1034 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1035 tb_end = tb_start + tb->size;
1036 if (tb_end > TARGET_PAGE_SIZE)
1037 tb_end = TARGET_PAGE_SIZE;
1038 } else {
1039 tb_start = 0;
1040 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1042 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1043 tb = tb->page_next[n];
1047 TranslationBlock *tb_gen_code(CPUArchState *env,
1048 target_ulong pc, target_ulong cs_base,
1049 int flags, int cflags)
1051 TranslationBlock *tb;
1052 uint8_t *tc_ptr;
1053 tb_page_addr_t phys_pc, phys_page2;
1054 target_ulong virt_page2;
1055 int code_gen_size;
1057 phys_pc = get_page_addr_code(env, pc);
1058 tb = tb_alloc(pc);
1059 if (!tb) {
1060 /* flush must be done */
1061 tb_flush(env);
1062 /* cannot fail at this point */
1063 tb = tb_alloc(pc);
1064 /* Don't forget to invalidate previous TB info. */
1065 tb_invalidated_flag = 1;
1067 tc_ptr = code_gen_ptr;
1068 tb->tc_ptr = tc_ptr;
1069 tb->cs_base = cs_base;
1070 tb->flags = flags;
1071 tb->cflags = cflags;
1072 cpu_gen_code(env, tb, &code_gen_size);
1073 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1075 /* check next page if needed */
1076 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1077 phys_page2 = -1;
1078 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1079 phys_page2 = get_page_addr_code(env, virt_page2);
1081 tb_link_page(tb, phys_pc, phys_page2);
1082 return tb;
1085 /* invalidate all TBs which intersect with the target physical page
1086 starting in range [start;end[. NOTE: start and end must refer to
1087 the same physical page. 'is_cpu_write_access' should be true if called
1088 from a real cpu write access: the virtual CPU will exit the current
1089 TB if code is modified inside this TB. */
1090 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1091 int is_cpu_write_access)
1093 TranslationBlock *tb, *tb_next, *saved_tb;
1094 CPUArchState *env = cpu_single_env;
1095 tb_page_addr_t tb_start, tb_end;
1096 PageDesc *p;
1097 int n;
1098 #ifdef TARGET_HAS_PRECISE_SMC
1099 int current_tb_not_found = is_cpu_write_access;
1100 TranslationBlock *current_tb = NULL;
1101 int current_tb_modified = 0;
1102 target_ulong current_pc = 0;
1103 target_ulong current_cs_base = 0;
1104 int current_flags = 0;
1105 #endif /* TARGET_HAS_PRECISE_SMC */
1107 p = page_find(start >> TARGET_PAGE_BITS);
1108 if (!p)
1109 return;
1110 if (!p->code_bitmap &&
1111 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1112 is_cpu_write_access) {
1113 /* build code bitmap */
1114 build_page_bitmap(p);
1117 /* we remove all the TBs in the range [start, end[ */
1118 /* XXX: see if in some cases it could be faster to invalidate all the code */
1119 tb = p->first_tb;
1120 while (tb != NULL) {
1121 n = (long)tb & 3;
1122 tb = (TranslationBlock *)((long)tb & ~3);
1123 tb_next = tb->page_next[n];
1124 /* NOTE: this is subtle as a TB may span two physical pages */
1125 if (n == 0) {
1126 /* NOTE: tb_end may be after the end of the page, but
1127 it is not a problem */
1128 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1129 tb_end = tb_start + tb->size;
1130 } else {
1131 tb_start = tb->page_addr[1];
1132 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1134 if (!(tb_end <= start || tb_start >= end)) {
1135 #ifdef TARGET_HAS_PRECISE_SMC
1136 if (current_tb_not_found) {
1137 current_tb_not_found = 0;
1138 current_tb = NULL;
1139 if (env->mem_io_pc) {
1140 /* now we have a real cpu fault */
1141 current_tb = tb_find_pc(env->mem_io_pc);
1144 if (current_tb == tb &&
1145 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1146 /* If we are modifying the current TB, we must stop
1147 its execution. We could be more precise by checking
1148 that the modification is after the current PC, but it
1149 would require a specialized function to partially
1150 restore the CPU state */
1152 current_tb_modified = 1;
1153 cpu_restore_state(current_tb, env, env->mem_io_pc);
1154 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1155 &current_flags);
1157 #endif /* TARGET_HAS_PRECISE_SMC */
1158 /* we need to do that to handle the case where a signal
1159 occurs while doing tb_phys_invalidate() */
1160 saved_tb = NULL;
1161 if (env) {
1162 saved_tb = env->current_tb;
1163 env->current_tb = NULL;
1165 tb_phys_invalidate(tb, -1);
1166 if (env) {
1167 env->current_tb = saved_tb;
1168 if (env->interrupt_request && env->current_tb)
1169 cpu_interrupt(env, env->interrupt_request);
1172 tb = tb_next;
1174 #if !defined(CONFIG_USER_ONLY)
1175 /* if no code remaining, no need to continue to use slow writes */
1176 if (!p->first_tb) {
1177 invalidate_page_bitmap(p);
1178 if (is_cpu_write_access) {
1179 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1182 #endif
1183 #ifdef TARGET_HAS_PRECISE_SMC
1184 if (current_tb_modified) {
1185 /* we generate a block containing just the instruction
1186 modifying the memory. It will ensure that it cannot modify
1187 itself */
1188 env->current_tb = NULL;
1189 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1190 cpu_resume_from_signal(env, NULL);
1192 #endif
1195 /* len must be <= 8 and start must be a multiple of len */
1196 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1198 PageDesc *p;
1199 int offset, b;
1200 #if 0
1201 if (1) {
1202 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1203 cpu_single_env->mem_io_vaddr, len,
1204 cpu_single_env->eip,
1205 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1207 #endif
1208 p = page_find(start >> TARGET_PAGE_BITS);
1209 if (!p)
1210 return;
1211 if (p->code_bitmap) {
1212 offset = start & ~TARGET_PAGE_MASK;
1213 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1214 if (b & ((1 << len) - 1))
1215 goto do_invalidate;
1216 } else {
1217 do_invalidate:
1218 tb_invalidate_phys_page_range(start, start + len, 1);
1222 #if !defined(CONFIG_SOFTMMU)
1223 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1224 uintptr_t pc, void *puc)
1226 TranslationBlock *tb;
1227 PageDesc *p;
1228 int n;
1229 #ifdef TARGET_HAS_PRECISE_SMC
1230 TranslationBlock *current_tb = NULL;
1231 CPUArchState *env = cpu_single_env;
1232 int current_tb_modified = 0;
1233 target_ulong current_pc = 0;
1234 target_ulong current_cs_base = 0;
1235 int current_flags = 0;
1236 #endif
1238 addr &= TARGET_PAGE_MASK;
1239 p = page_find(addr >> TARGET_PAGE_BITS);
1240 if (!p)
1241 return;
1242 tb = p->first_tb;
1243 #ifdef TARGET_HAS_PRECISE_SMC
1244 if (tb && pc != 0) {
1245 current_tb = tb_find_pc(pc);
1247 #endif
1248 while (tb != NULL) {
1249 n = (long)tb & 3;
1250 tb = (TranslationBlock *)((long)tb & ~3);
1251 #ifdef TARGET_HAS_PRECISE_SMC
1252 if (current_tb == tb &&
1253 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1254 /* If we are modifying the current TB, we must stop
1255 its execution. We could be more precise by checking
1256 that the modification is after the current PC, but it
1257 would require a specialized function to partially
1258 restore the CPU state */
1260 current_tb_modified = 1;
1261 cpu_restore_state(current_tb, env, pc);
1262 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1263 &current_flags);
1265 #endif /* TARGET_HAS_PRECISE_SMC */
1266 tb_phys_invalidate(tb, addr);
1267 tb = tb->page_next[n];
1269 p->first_tb = NULL;
1270 #ifdef TARGET_HAS_PRECISE_SMC
1271 if (current_tb_modified) {
1272 /* we generate a block containing just the instruction
1273 modifying the memory. It will ensure that it cannot modify
1274 itself */
1275 env->current_tb = NULL;
1276 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1277 cpu_resume_from_signal(env, puc);
1279 #endif
1281 #endif
1283 /* add the tb in the target page and protect it if necessary */
1284 static inline void tb_alloc_page(TranslationBlock *tb,
1285 unsigned int n, tb_page_addr_t page_addr)
1287 PageDesc *p;
1288 #ifndef CONFIG_USER_ONLY
1289 bool page_already_protected;
1290 #endif
1292 tb->page_addr[n] = page_addr;
1293 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1294 tb->page_next[n] = p->first_tb;
1295 #ifndef CONFIG_USER_ONLY
1296 page_already_protected = p->first_tb != NULL;
1297 #endif
1298 p->first_tb = (TranslationBlock *)((long)tb | n);
1299 invalidate_page_bitmap(p);
1301 #if defined(TARGET_HAS_SMC) || 1
1303 #if defined(CONFIG_USER_ONLY)
1304 if (p->flags & PAGE_WRITE) {
1305 target_ulong addr;
1306 PageDesc *p2;
1307 int prot;
1309 /* force the host page as non writable (writes will have a
1310 page fault + mprotect overhead) */
1311 page_addr &= qemu_host_page_mask;
1312 prot = 0;
1313 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1314 addr += TARGET_PAGE_SIZE) {
1316 p2 = page_find (addr >> TARGET_PAGE_BITS);
1317 if (!p2)
1318 continue;
1319 prot |= p2->flags;
1320 p2->flags &= ~PAGE_WRITE;
1322 mprotect(g2h(page_addr), qemu_host_page_size,
1323 (prot & PAGE_BITS) & ~PAGE_WRITE);
1324 #ifdef DEBUG_TB_INVALIDATE
1325 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1326 page_addr);
1327 #endif
1329 #else
1330 /* if some code is already present, then the pages are already
1331 protected. So we handle the case where only the first TB is
1332 allocated in a physical page */
1333 if (!page_already_protected) {
1334 tlb_protect_code(page_addr);
1336 #endif
1338 #endif /* TARGET_HAS_SMC */
1341 /* add a new TB and link it to the physical page tables. phys_page2 is
1342 (-1) to indicate that only one page contains the TB. */
1343 void tb_link_page(TranslationBlock *tb,
1344 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1346 unsigned int h;
1347 TranslationBlock **ptb;
1349 /* Grab the mmap lock to stop another thread invalidating this TB
1350 before we are done. */
1351 mmap_lock();
1352 /* add in the physical hash table */
1353 h = tb_phys_hash_func(phys_pc);
1354 ptb = &tb_phys_hash[h];
1355 tb->phys_hash_next = *ptb;
1356 *ptb = tb;
1358 /* add in the page list */
1359 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1360 if (phys_page2 != -1)
1361 tb_alloc_page(tb, 1, phys_page2);
1362 else
1363 tb->page_addr[1] = -1;
1365 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1366 tb->jmp_next[0] = NULL;
1367 tb->jmp_next[1] = NULL;
1369 /* init original jump addresses */
1370 if (tb->tb_next_offset[0] != 0xffff)
1371 tb_reset_jump(tb, 0);
1372 if (tb->tb_next_offset[1] != 0xffff)
1373 tb_reset_jump(tb, 1);
1375 #ifdef DEBUG_TB_CHECK
1376 tb_page_check();
1377 #endif
1378 mmap_unlock();
1381 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1382 tb[1].tc_ptr. Return NULL if not found */
1383 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1385 int m_min, m_max, m;
1386 unsigned long v;
1387 TranslationBlock *tb;
1389 if (nb_tbs <= 0)
1390 return NULL;
1391 if (tc_ptr < (unsigned long)code_gen_buffer ||
1392 tc_ptr >= (unsigned long)code_gen_ptr)
1393 return NULL;
1394 /* binary search (cf Knuth) */
1395 m_min = 0;
1396 m_max = nb_tbs - 1;
1397 while (m_min <= m_max) {
1398 m = (m_min + m_max) >> 1;
1399 tb = &tbs[m];
1400 v = (unsigned long)tb->tc_ptr;
1401 if (v == tc_ptr)
1402 return tb;
1403 else if (tc_ptr < v) {
1404 m_max = m - 1;
1405 } else {
1406 m_min = m + 1;
1409 return &tbs[m_max];
1412 static void tb_reset_jump_recursive(TranslationBlock *tb);
1414 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1416 TranslationBlock *tb1, *tb_next, **ptb;
1417 unsigned int n1;
1419 tb1 = tb->jmp_next[n];
1420 if (tb1 != NULL) {
1421 /* find head of list */
1422 for(;;) {
1423 n1 = (long)tb1 & 3;
1424 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1425 if (n1 == 2)
1426 break;
1427 tb1 = tb1->jmp_next[n1];
1429 /* we are now sure now that tb jumps to tb1 */
1430 tb_next = tb1;
1432 /* remove tb from the jmp_first list */
1433 ptb = &tb_next->jmp_first;
1434 for(;;) {
1435 tb1 = *ptb;
1436 n1 = (long)tb1 & 3;
1437 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1438 if (n1 == n && tb1 == tb)
1439 break;
1440 ptb = &tb1->jmp_next[n1];
1442 *ptb = tb->jmp_next[n];
1443 tb->jmp_next[n] = NULL;
1445 /* suppress the jump to next tb in generated code */
1446 tb_reset_jump(tb, n);
1448 /* suppress jumps in the tb on which we could have jumped */
1449 tb_reset_jump_recursive(tb_next);
1453 static void tb_reset_jump_recursive(TranslationBlock *tb)
1455 tb_reset_jump_recursive2(tb, 0);
1456 tb_reset_jump_recursive2(tb, 1);
1459 #if defined(TARGET_HAS_ICE)
1460 #if defined(CONFIG_USER_ONLY)
1461 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1463 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1465 #else
1466 void tb_invalidate_phys_addr(target_phys_addr_t addr)
1468 ram_addr_t ram_addr;
1469 MemoryRegionSection *section;
1471 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1472 if (!(memory_region_is_ram(section->mr)
1473 || (section->mr->rom_device && section->mr->readable))) {
1474 return;
1476 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1477 + section_addr(section, addr);
1478 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1481 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1483 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc));
1485 #endif
1486 #endif /* TARGET_HAS_ICE */
1488 #if defined(CONFIG_USER_ONLY)
1489 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1494 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1495 int flags, CPUWatchpoint **watchpoint)
1497 return -ENOSYS;
1499 #else
1500 /* Add a watchpoint. */
1501 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1502 int flags, CPUWatchpoint **watchpoint)
1504 target_ulong len_mask = ~(len - 1);
1505 CPUWatchpoint *wp;
1507 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1508 if ((len & (len - 1)) || (addr & ~len_mask) ||
1509 len == 0 || len > TARGET_PAGE_SIZE) {
1510 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1511 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1512 return -EINVAL;
1514 wp = g_malloc(sizeof(*wp));
1516 wp->vaddr = addr;
1517 wp->len_mask = len_mask;
1518 wp->flags = flags;
1520 /* keep all GDB-injected watchpoints in front */
1521 if (flags & BP_GDB)
1522 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1523 else
1524 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1526 tlb_flush_page(env, addr);
1528 if (watchpoint)
1529 *watchpoint = wp;
1530 return 0;
1533 /* Remove a specific watchpoint. */
1534 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1535 int flags)
1537 target_ulong len_mask = ~(len - 1);
1538 CPUWatchpoint *wp;
1540 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1541 if (addr == wp->vaddr && len_mask == wp->len_mask
1542 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1543 cpu_watchpoint_remove_by_ref(env, wp);
1544 return 0;
1547 return -ENOENT;
1550 /* Remove a specific watchpoint by reference. */
1551 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1553 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1555 tlb_flush_page(env, watchpoint->vaddr);
1557 g_free(watchpoint);
1560 /* Remove all matching watchpoints. */
1561 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1563 CPUWatchpoint *wp, *next;
1565 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1566 if (wp->flags & mask)
1567 cpu_watchpoint_remove_by_ref(env, wp);
1570 #endif
1572 /* Add a breakpoint. */
1573 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1574 CPUBreakpoint **breakpoint)
1576 #if defined(TARGET_HAS_ICE)
1577 CPUBreakpoint *bp;
1579 bp = g_malloc(sizeof(*bp));
1581 bp->pc = pc;
1582 bp->flags = flags;
1584 /* keep all GDB-injected breakpoints in front */
1585 if (flags & BP_GDB)
1586 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1587 else
1588 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1590 breakpoint_invalidate(env, pc);
1592 if (breakpoint)
1593 *breakpoint = bp;
1594 return 0;
1595 #else
1596 return -ENOSYS;
1597 #endif
1600 /* Remove a specific breakpoint. */
1601 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1603 #if defined(TARGET_HAS_ICE)
1604 CPUBreakpoint *bp;
1606 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1607 if (bp->pc == pc && bp->flags == flags) {
1608 cpu_breakpoint_remove_by_ref(env, bp);
1609 return 0;
1612 return -ENOENT;
1613 #else
1614 return -ENOSYS;
1615 #endif
1618 /* Remove a specific breakpoint by reference. */
1619 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1621 #if defined(TARGET_HAS_ICE)
1622 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1624 breakpoint_invalidate(env, breakpoint->pc);
1626 g_free(breakpoint);
1627 #endif
1630 /* Remove all matching breakpoints. */
1631 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1633 #if defined(TARGET_HAS_ICE)
1634 CPUBreakpoint *bp, *next;
1636 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1637 if (bp->flags & mask)
1638 cpu_breakpoint_remove_by_ref(env, bp);
1640 #endif
1643 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1644 CPU loop after each instruction */
1645 void cpu_single_step(CPUArchState *env, int enabled)
1647 #if defined(TARGET_HAS_ICE)
1648 if (env->singlestep_enabled != enabled) {
1649 env->singlestep_enabled = enabled;
1650 if (kvm_enabled())
1651 kvm_update_guest_debug(env, 0);
1652 else {
1653 /* must flush all the translated code to avoid inconsistencies */
1654 /* XXX: only flush what is necessary */
1655 tb_flush(env);
1658 #endif
1661 /* enable or disable low levels log */
1662 void cpu_set_log(int log_flags)
1664 loglevel = log_flags;
1665 if (loglevel && !logfile) {
1666 logfile = fopen(logfilename, log_append ? "a" : "w");
1667 if (!logfile) {
1668 perror(logfilename);
1669 _exit(1);
1671 #if !defined(CONFIG_SOFTMMU)
1672 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1674 static char logfile_buf[4096];
1675 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1677 #elif defined(_WIN32)
1678 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1679 setvbuf(logfile, NULL, _IONBF, 0);
1680 #else
1681 setvbuf(logfile, NULL, _IOLBF, 0);
1682 #endif
1683 log_append = 1;
1685 if (!loglevel && logfile) {
1686 fclose(logfile);
1687 logfile = NULL;
1691 void cpu_set_log_filename(const char *filename)
1693 logfilename = strdup(filename);
1694 if (logfile) {
1695 fclose(logfile);
1696 logfile = NULL;
1698 cpu_set_log(loglevel);
1701 static void cpu_unlink_tb(CPUArchState *env)
1703 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1704 problem and hope the cpu will stop of its own accord. For userspace
1705 emulation this often isn't actually as bad as it sounds. Often
1706 signals are used primarily to interrupt blocking syscalls. */
1707 TranslationBlock *tb;
1708 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1710 spin_lock(&interrupt_lock);
1711 tb = env->current_tb;
1712 /* if the cpu is currently executing code, we must unlink it and
1713 all the potentially executing TB */
1714 if (tb) {
1715 env->current_tb = NULL;
1716 tb_reset_jump_recursive(tb);
1718 spin_unlock(&interrupt_lock);
1721 #ifndef CONFIG_USER_ONLY
1722 /* mask must never be zero, except for A20 change call */
1723 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1725 int old_mask;
1727 old_mask = env->interrupt_request;
1728 env->interrupt_request |= mask;
1731 * If called from iothread context, wake the target cpu in
1732 * case its halted.
1734 if (!qemu_cpu_is_self(env)) {
1735 qemu_cpu_kick(env);
1736 return;
1739 if (use_icount) {
1740 env->icount_decr.u16.high = 0xffff;
1741 if (!can_do_io(env)
1742 && (mask & ~old_mask) != 0) {
1743 cpu_abort(env, "Raised interrupt while not in I/O function");
1745 } else {
1746 cpu_unlink_tb(env);
1750 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1752 #else /* CONFIG_USER_ONLY */
1754 void cpu_interrupt(CPUArchState *env, int mask)
1756 env->interrupt_request |= mask;
1757 cpu_unlink_tb(env);
1759 #endif /* CONFIG_USER_ONLY */
1761 void cpu_reset_interrupt(CPUArchState *env, int mask)
1763 env->interrupt_request &= ~mask;
1766 void cpu_exit(CPUArchState *env)
1768 env->exit_request = 1;
1769 cpu_unlink_tb(env);
1772 const CPULogItem cpu_log_items[] = {
1773 { CPU_LOG_TB_OUT_ASM, "out_asm",
1774 "show generated host assembly code for each compiled TB" },
1775 { CPU_LOG_TB_IN_ASM, "in_asm",
1776 "show target assembly code for each compiled TB" },
1777 { CPU_LOG_TB_OP, "op",
1778 "show micro ops for each compiled TB" },
1779 { CPU_LOG_TB_OP_OPT, "op_opt",
1780 "show micro ops "
1781 #ifdef TARGET_I386
1782 "before eflags optimization and "
1783 #endif
1784 "after liveness analysis" },
1785 { CPU_LOG_INT, "int",
1786 "show interrupts/exceptions in short format" },
1787 { CPU_LOG_EXEC, "exec",
1788 "show trace before each executed TB (lots of logs)" },
1789 { CPU_LOG_TB_CPU, "cpu",
1790 "show CPU state before block translation" },
1791 #ifdef TARGET_I386
1792 { CPU_LOG_PCALL, "pcall",
1793 "show protected mode far calls/returns/exceptions" },
1794 { CPU_LOG_RESET, "cpu_reset",
1795 "show CPU state before CPU resets" },
1796 #endif
1797 #ifdef DEBUG_IOPORT
1798 { CPU_LOG_IOPORT, "ioport",
1799 "show all i/o ports accesses" },
1800 #endif
1801 { 0, NULL, NULL },
1804 static int cmp1(const char *s1, int n, const char *s2)
1806 if (strlen(s2) != n)
1807 return 0;
1808 return memcmp(s1, s2, n) == 0;
1811 /* takes a comma separated list of log masks. Return 0 if error. */
1812 int cpu_str_to_log_mask(const char *str)
1814 const CPULogItem *item;
1815 int mask;
1816 const char *p, *p1;
1818 p = str;
1819 mask = 0;
1820 for(;;) {
1821 p1 = strchr(p, ',');
1822 if (!p1)
1823 p1 = p + strlen(p);
1824 if(cmp1(p,p1-p,"all")) {
1825 for(item = cpu_log_items; item->mask != 0; item++) {
1826 mask |= item->mask;
1828 } else {
1829 for(item = cpu_log_items; item->mask != 0; item++) {
1830 if (cmp1(p, p1 - p, item->name))
1831 goto found;
1833 return 0;
1835 found:
1836 mask |= item->mask;
1837 if (*p1 != ',')
1838 break;
1839 p = p1 + 1;
1841 return mask;
1844 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1846 va_list ap;
1847 va_list ap2;
1849 va_start(ap, fmt);
1850 va_copy(ap2, ap);
1851 fprintf(stderr, "qemu: fatal: ");
1852 vfprintf(stderr, fmt, ap);
1853 fprintf(stderr, "\n");
1854 #ifdef TARGET_I386
1855 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1856 #else
1857 cpu_dump_state(env, stderr, fprintf, 0);
1858 #endif
1859 if (qemu_log_enabled()) {
1860 qemu_log("qemu: fatal: ");
1861 qemu_log_vprintf(fmt, ap2);
1862 qemu_log("\n");
1863 #ifdef TARGET_I386
1864 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1865 #else
1866 log_cpu_state(env, 0);
1867 #endif
1868 qemu_log_flush();
1869 qemu_log_close();
1871 va_end(ap2);
1872 va_end(ap);
1873 #if defined(CONFIG_USER_ONLY)
1875 struct sigaction act;
1876 sigfillset(&act.sa_mask);
1877 act.sa_handler = SIG_DFL;
1878 sigaction(SIGABRT, &act, NULL);
1880 #endif
1881 abort();
1884 CPUArchState *cpu_copy(CPUArchState *env)
1886 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1887 CPUArchState *next_cpu = new_env->next_cpu;
1888 int cpu_index = new_env->cpu_index;
1889 #if defined(TARGET_HAS_ICE)
1890 CPUBreakpoint *bp;
1891 CPUWatchpoint *wp;
1892 #endif
1894 memcpy(new_env, env, sizeof(CPUArchState));
1896 /* Preserve chaining and index. */
1897 new_env->next_cpu = next_cpu;
1898 new_env->cpu_index = cpu_index;
1900 /* Clone all break/watchpoints.
1901 Note: Once we support ptrace with hw-debug register access, make sure
1902 BP_CPU break/watchpoints are handled correctly on clone. */
1903 QTAILQ_INIT(&env->breakpoints);
1904 QTAILQ_INIT(&env->watchpoints);
1905 #if defined(TARGET_HAS_ICE)
1906 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1907 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1909 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1910 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1911 wp->flags, NULL);
1913 #endif
1915 return new_env;
1918 #if !defined(CONFIG_USER_ONLY)
1920 static inline void tlb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1922 unsigned int i;
1924 /* Discard jump cache entries for any tb which might potentially
1925 overlap the flushed page. */
1926 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1927 memset (&env->tb_jmp_cache[i], 0,
1928 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1930 i = tb_jmp_cache_hash_page(addr);
1931 memset (&env->tb_jmp_cache[i], 0,
1932 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1935 static CPUTLBEntry s_cputlb_empty_entry = {
1936 .addr_read = -1,
1937 .addr_write = -1,
1938 .addr_code = -1,
1939 .addend = -1,
1942 /* NOTE:
1943 * If flush_global is true (the usual case), flush all tlb entries.
1944 * If flush_global is false, flush (at least) all tlb entries not
1945 * marked global.
1947 * Since QEMU doesn't currently implement a global/not-global flag
1948 * for tlb entries, at the moment tlb_flush() will also flush all
1949 * tlb entries in the flush_global == false case. This is OK because
1950 * CPU architectures generally permit an implementation to drop
1951 * entries from the TLB at any time, so flushing more entries than
1952 * required is only an efficiency issue, not a correctness issue.
1954 void tlb_flush(CPUArchState *env, int flush_global)
1956 int i;
1958 #if defined(DEBUG_TLB)
1959 printf("tlb_flush:\n");
1960 #endif
1961 /* must reset current TB so that interrupts cannot modify the
1962 links while we are modifying them */
1963 env->current_tb = NULL;
1965 for(i = 0; i < CPU_TLB_SIZE; i++) {
1966 int mmu_idx;
1967 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1968 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1972 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1974 env->tlb_flush_addr = -1;
1975 env->tlb_flush_mask = 0;
1976 tlb_flush_count++;
1979 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1981 if (addr == (tlb_entry->addr_read &
1982 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1983 addr == (tlb_entry->addr_write &
1984 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1985 addr == (tlb_entry->addr_code &
1986 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1987 *tlb_entry = s_cputlb_empty_entry;
1991 void tlb_flush_page(CPUArchState *env, target_ulong addr)
1993 int i;
1994 int mmu_idx;
1996 #if defined(DEBUG_TLB)
1997 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1998 #endif
1999 /* Check if we need to flush due to large pages. */
2000 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2001 #if defined(DEBUG_TLB)
2002 printf("tlb_flush_page: forced full flush ("
2003 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2004 env->tlb_flush_addr, env->tlb_flush_mask);
2005 #endif
2006 tlb_flush(env, 1);
2007 return;
2009 /* must reset current TB so that interrupts cannot modify the
2010 links while we are modifying them */
2011 env->current_tb = NULL;
2013 addr &= TARGET_PAGE_MASK;
2014 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2015 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2016 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2018 tlb_flush_jmp_cache(env, addr);
2021 /* update the TLBs so that writes to code in the virtual page 'addr'
2022 can be detected */
2023 static void tlb_protect_code(ram_addr_t ram_addr)
2025 cpu_physical_memory_reset_dirty(ram_addr,
2026 ram_addr + TARGET_PAGE_SIZE,
2027 CODE_DIRTY_FLAG);
2030 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2031 tested for self modifying code */
2032 static void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
2033 target_ulong vaddr)
2035 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2038 static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
2040 return (tlbe->addr_write & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) == 0;
2043 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2044 unsigned long start, unsigned long length)
2046 unsigned long addr;
2047 if (tlb_is_dirty_ram(tlb_entry)) {
2048 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2049 if ((addr - start) < length) {
2050 tlb_entry->addr_write |= TLB_NOTDIRTY;
2055 /* Note: start and end must be within the same ram block. */
2056 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2057 int dirty_flags)
2059 CPUArchState *env;
2060 unsigned long length, start1;
2061 int i;
2063 start &= TARGET_PAGE_MASK;
2064 end = TARGET_PAGE_ALIGN(end);
2066 length = end - start;
2067 if (length == 0)
2068 return;
2069 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2071 /* we modify the TLB cache so that the dirty bit will be set again
2072 when accessing the range */
2073 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2074 /* Check that we don't span multiple blocks - this breaks the
2075 address comparisons below. */
2076 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2077 != (end - 1) - start) {
2078 abort();
2081 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2082 int mmu_idx;
2083 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2084 for(i = 0; i < CPU_TLB_SIZE; i++)
2085 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2086 start1, length);
2091 int cpu_physical_memory_set_dirty_tracking(int enable)
2093 int ret = 0;
2094 in_migration = enable;
2095 return ret;
2098 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2100 ram_addr_t ram_addr;
2101 void *p;
2103 if (tlb_is_dirty_ram(tlb_entry)) {
2104 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2105 + tlb_entry->addend);
2106 ram_addr = qemu_ram_addr_from_host_nofail(p);
2107 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2108 tlb_entry->addr_write |= TLB_NOTDIRTY;
2113 /* update the TLB according to the current state of the dirty bits */
2114 void cpu_tlb_update_dirty(CPUArchState *env)
2116 int i;
2117 int mmu_idx;
2118 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2119 for(i = 0; i < CPU_TLB_SIZE; i++)
2120 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2124 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2126 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2127 tlb_entry->addr_write = vaddr;
2130 /* update the TLB corresponding to virtual page vaddr
2131 so that it is no longer dirty */
2132 static inline void tlb_set_dirty(CPUArchState *env, target_ulong vaddr)
2134 int i;
2135 int mmu_idx;
2137 vaddr &= TARGET_PAGE_MASK;
2138 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2139 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2140 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2143 /* Our TLB does not support large pages, so remember the area covered by
2144 large pages and trigger a full TLB flush if these are invalidated. */
2145 static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
2146 target_ulong size)
2148 target_ulong mask = ~(size - 1);
2150 if (env->tlb_flush_addr == (target_ulong)-1) {
2151 env->tlb_flush_addr = vaddr & mask;
2152 env->tlb_flush_mask = mask;
2153 return;
2155 /* Extend the existing region to include the new page.
2156 This is a compromise between unnecessary flushes and the cost
2157 of maintaining a full variable size TLB. */
2158 mask &= env->tlb_flush_mask;
2159 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2160 mask <<= 1;
2162 env->tlb_flush_addr &= mask;
2163 env->tlb_flush_mask = mask;
2166 static bool is_ram_rom(MemoryRegionSection *s)
2168 return memory_region_is_ram(s->mr);
2171 static bool is_romd(MemoryRegionSection *s)
2173 MemoryRegion *mr = s->mr;
2175 return mr->rom_device && mr->readable;
2178 static bool is_ram_rom_romd(MemoryRegionSection *s)
2180 return is_ram_rom(s) || is_romd(s);
2183 /* Add a new TLB entry. At most one entry for a given virtual address
2184 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2185 supplied size is only used by tlb_flush_page. */
2186 void tlb_set_page(CPUArchState *env, target_ulong vaddr,
2187 target_phys_addr_t paddr, int prot,
2188 int mmu_idx, target_ulong size)
2190 MemoryRegionSection *section;
2191 unsigned int index;
2192 target_ulong address;
2193 target_ulong code_address;
2194 unsigned long addend;
2195 CPUTLBEntry *te;
2196 CPUWatchpoint *wp;
2197 target_phys_addr_t iotlb;
2199 assert(size >= TARGET_PAGE_SIZE);
2200 if (size != TARGET_PAGE_SIZE) {
2201 tlb_add_large_page(env, vaddr, size);
2203 section = phys_page_find(paddr >> TARGET_PAGE_BITS);
2204 #if defined(DEBUG_TLB)
2205 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2206 " prot=%x idx=%d pd=0x%08lx\n",
2207 vaddr, paddr, prot, mmu_idx, pd);
2208 #endif
2210 address = vaddr;
2211 if (!is_ram_rom_romd(section)) {
2212 /* IO memory case (romd handled later) */
2213 address |= TLB_MMIO;
2215 if (is_ram_rom_romd(section)) {
2216 addend = (unsigned long)memory_region_get_ram_ptr(section->mr)
2217 + section_addr(section, paddr);
2218 } else {
2219 addend = 0;
2221 if (is_ram_rom(section)) {
2222 /* Normal RAM. */
2223 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2224 + section_addr(section, paddr);
2225 if (!section->readonly)
2226 iotlb |= phys_section_notdirty;
2227 else
2228 iotlb |= phys_section_rom;
2229 } else {
2230 /* IO handlers are currently passed a physical address.
2231 It would be nice to pass an offset from the base address
2232 of that region. This would avoid having to special case RAM,
2233 and avoid full address decoding in every device.
2234 We can't use the high bits of pd for this because
2235 IO_MEM_ROMD uses these as a ram address. */
2236 iotlb = section - phys_sections;
2237 iotlb += section_addr(section, paddr);
2240 code_address = address;
2241 /* Make accesses to pages with watchpoints go via the
2242 watchpoint trap routines. */
2243 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2244 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2245 /* Avoid trapping reads of pages with a write breakpoint. */
2246 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2247 iotlb = phys_section_watch + paddr;
2248 address |= TLB_MMIO;
2249 break;
2254 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2255 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2256 te = &env->tlb_table[mmu_idx][index];
2257 te->addend = addend - vaddr;
2258 if (prot & PAGE_READ) {
2259 te->addr_read = address;
2260 } else {
2261 te->addr_read = -1;
2264 if (prot & PAGE_EXEC) {
2265 te->addr_code = code_address;
2266 } else {
2267 te->addr_code = -1;
2269 if (prot & PAGE_WRITE) {
2270 if ((memory_region_is_ram(section->mr) && section->readonly)
2271 || is_romd(section)) {
2272 /* Write access calls the I/O callback. */
2273 te->addr_write = address | TLB_MMIO;
2274 } else if (memory_region_is_ram(section->mr)
2275 && !cpu_physical_memory_is_dirty(
2276 section->mr->ram_addr
2277 + section_addr(section, paddr))) {
2278 te->addr_write = address | TLB_NOTDIRTY;
2279 } else {
2280 te->addr_write = address;
2282 } else {
2283 te->addr_write = -1;
2287 #else
2289 void tlb_flush(CPUArchState *env, int flush_global)
2293 void tlb_flush_page(CPUArchState *env, target_ulong addr)
2298 * Walks guest process memory "regions" one by one
2299 * and calls callback function 'fn' for each region.
2302 struct walk_memory_regions_data
2304 walk_memory_regions_fn fn;
2305 void *priv;
2306 unsigned long start;
2307 int prot;
2310 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2311 abi_ulong end, int new_prot)
2313 if (data->start != -1ul) {
2314 int rc = data->fn(data->priv, data->start, end, data->prot);
2315 if (rc != 0) {
2316 return rc;
2320 data->start = (new_prot ? end : -1ul);
2321 data->prot = new_prot;
2323 return 0;
2326 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2327 abi_ulong base, int level, void **lp)
2329 abi_ulong pa;
2330 int i, rc;
2332 if (*lp == NULL) {
2333 return walk_memory_regions_end(data, base, 0);
2336 if (level == 0) {
2337 PageDesc *pd = *lp;
2338 for (i = 0; i < L2_SIZE; ++i) {
2339 int prot = pd[i].flags;
2341 pa = base | (i << TARGET_PAGE_BITS);
2342 if (prot != data->prot) {
2343 rc = walk_memory_regions_end(data, pa, prot);
2344 if (rc != 0) {
2345 return rc;
2349 } else {
2350 void **pp = *lp;
2351 for (i = 0; i < L2_SIZE; ++i) {
2352 pa = base | ((abi_ulong)i <<
2353 (TARGET_PAGE_BITS + L2_BITS * level));
2354 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2355 if (rc != 0) {
2356 return rc;
2361 return 0;
2364 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2366 struct walk_memory_regions_data data;
2367 unsigned long i;
2369 data.fn = fn;
2370 data.priv = priv;
2371 data.start = -1ul;
2372 data.prot = 0;
2374 for (i = 0; i < V_L1_SIZE; i++) {
2375 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2376 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2377 if (rc != 0) {
2378 return rc;
2382 return walk_memory_regions_end(&data, 0, 0);
2385 static int dump_region(void *priv, abi_ulong start,
2386 abi_ulong end, unsigned long prot)
2388 FILE *f = (FILE *)priv;
2390 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2391 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2392 start, end, end - start,
2393 ((prot & PAGE_READ) ? 'r' : '-'),
2394 ((prot & PAGE_WRITE) ? 'w' : '-'),
2395 ((prot & PAGE_EXEC) ? 'x' : '-'));
2397 return (0);
2400 /* dump memory mappings */
2401 void page_dump(FILE *f)
2403 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2404 "start", "end", "size", "prot");
2405 walk_memory_regions(f, dump_region);
2408 int page_get_flags(target_ulong address)
2410 PageDesc *p;
2412 p = page_find(address >> TARGET_PAGE_BITS);
2413 if (!p)
2414 return 0;
2415 return p->flags;
2418 /* Modify the flags of a page and invalidate the code if necessary.
2419 The flag PAGE_WRITE_ORG is positioned automatically depending
2420 on PAGE_WRITE. The mmap_lock should already be held. */
2421 void page_set_flags(target_ulong start, target_ulong end, int flags)
2423 target_ulong addr, len;
2425 /* This function should never be called with addresses outside the
2426 guest address space. If this assert fires, it probably indicates
2427 a missing call to h2g_valid. */
2428 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2429 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2430 #endif
2431 assert(start < end);
2433 start = start & TARGET_PAGE_MASK;
2434 end = TARGET_PAGE_ALIGN(end);
2436 if (flags & PAGE_WRITE) {
2437 flags |= PAGE_WRITE_ORG;
2440 for (addr = start, len = end - start;
2441 len != 0;
2442 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2443 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2445 /* If the write protection bit is set, then we invalidate
2446 the code inside. */
2447 if (!(p->flags & PAGE_WRITE) &&
2448 (flags & PAGE_WRITE) &&
2449 p->first_tb) {
2450 tb_invalidate_phys_page(addr, 0, NULL);
2452 p->flags = flags;
2456 int page_check_range(target_ulong start, target_ulong len, int flags)
2458 PageDesc *p;
2459 target_ulong end;
2460 target_ulong addr;
2462 /* This function should never be called with addresses outside the
2463 guest address space. If this assert fires, it probably indicates
2464 a missing call to h2g_valid. */
2465 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2466 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2467 #endif
2469 if (len == 0) {
2470 return 0;
2472 if (start + len - 1 < start) {
2473 /* We've wrapped around. */
2474 return -1;
2477 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2478 start = start & TARGET_PAGE_MASK;
2480 for (addr = start, len = end - start;
2481 len != 0;
2482 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2483 p = page_find(addr >> TARGET_PAGE_BITS);
2484 if( !p )
2485 return -1;
2486 if( !(p->flags & PAGE_VALID) )
2487 return -1;
2489 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2490 return -1;
2491 if (flags & PAGE_WRITE) {
2492 if (!(p->flags & PAGE_WRITE_ORG))
2493 return -1;
2494 /* unprotect the page if it was put read-only because it
2495 contains translated code */
2496 if (!(p->flags & PAGE_WRITE)) {
2497 if (!page_unprotect(addr, 0, NULL))
2498 return -1;
2500 return 0;
2503 return 0;
2506 /* called from signal handler: invalidate the code and unprotect the
2507 page. Return TRUE if the fault was successfully handled. */
2508 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2510 unsigned int prot;
2511 PageDesc *p;
2512 target_ulong host_start, host_end, addr;
2514 /* Technically this isn't safe inside a signal handler. However we
2515 know this only ever happens in a synchronous SEGV handler, so in
2516 practice it seems to be ok. */
2517 mmap_lock();
2519 p = page_find(address >> TARGET_PAGE_BITS);
2520 if (!p) {
2521 mmap_unlock();
2522 return 0;
2525 /* if the page was really writable, then we change its
2526 protection back to writable */
2527 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2528 host_start = address & qemu_host_page_mask;
2529 host_end = host_start + qemu_host_page_size;
2531 prot = 0;
2532 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2533 p = page_find(addr >> TARGET_PAGE_BITS);
2534 p->flags |= PAGE_WRITE;
2535 prot |= p->flags;
2537 /* and since the content will be modified, we must invalidate
2538 the corresponding translated code. */
2539 tb_invalidate_phys_page(addr, pc, puc);
2540 #ifdef DEBUG_TB_CHECK
2541 tb_invalidate_check(addr);
2542 #endif
2544 mprotect((void *)g2h(host_start), qemu_host_page_size,
2545 prot & PAGE_BITS);
2547 mmap_unlock();
2548 return 1;
2550 mmap_unlock();
2551 return 0;
2554 static inline void tlb_set_dirty(CPUArchState *env,
2555 unsigned long addr, target_ulong vaddr)
2558 #endif /* defined(CONFIG_USER_ONLY) */
2560 #if !defined(CONFIG_USER_ONLY)
2562 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2563 typedef struct subpage_t {
2564 MemoryRegion iomem;
2565 target_phys_addr_t base;
2566 uint16_t sub_section[TARGET_PAGE_SIZE];
2567 } subpage_t;
2569 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2570 uint16_t section);
2571 static subpage_t *subpage_init(target_phys_addr_t base);
2572 static void destroy_page_desc(uint16_t section_index)
2574 MemoryRegionSection *section = &phys_sections[section_index];
2575 MemoryRegion *mr = section->mr;
2577 if (mr->subpage) {
2578 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2579 memory_region_destroy(&subpage->iomem);
2580 g_free(subpage);
2584 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2586 unsigned i;
2587 PhysPageEntry *p;
2589 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2590 return;
2593 p = phys_map_nodes[lp->ptr];
2594 for (i = 0; i < L2_SIZE; ++i) {
2595 if (!p[i].is_leaf) {
2596 destroy_l2_mapping(&p[i], level - 1);
2597 } else {
2598 destroy_page_desc(p[i].ptr);
2601 lp->is_leaf = 0;
2602 lp->ptr = PHYS_MAP_NODE_NIL;
2605 static void destroy_all_mappings(void)
2607 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2608 phys_map_nodes_reset();
2611 static uint16_t phys_section_add(MemoryRegionSection *section)
2613 if (phys_sections_nb == phys_sections_nb_alloc) {
2614 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2615 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2616 phys_sections_nb_alloc);
2618 phys_sections[phys_sections_nb] = *section;
2619 return phys_sections_nb++;
2622 static void phys_sections_clear(void)
2624 phys_sections_nb = 0;
2627 /* register physical memory.
2628 For RAM, 'size' must be a multiple of the target page size.
2629 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2630 io memory page. The address used when calling the IO function is
2631 the offset from the start of the region, plus region_offset. Both
2632 start_addr and region_offset are rounded down to a page boundary
2633 before calculating this offset. This should not be a problem unless
2634 the low bits of start_addr and region_offset differ. */
2635 static void register_subpage(MemoryRegionSection *section)
2637 subpage_t *subpage;
2638 target_phys_addr_t base = section->offset_within_address_space
2639 & TARGET_PAGE_MASK;
2640 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2641 MemoryRegionSection subsection = {
2642 .offset_within_address_space = base,
2643 .size = TARGET_PAGE_SIZE,
2645 target_phys_addr_t start, end;
2647 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2649 if (!(existing->mr->subpage)) {
2650 subpage = subpage_init(base);
2651 subsection.mr = &subpage->iomem;
2652 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2653 phys_section_add(&subsection));
2654 } else {
2655 subpage = container_of(existing->mr, subpage_t, iomem);
2657 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2658 end = start + section->size;
2659 subpage_register(subpage, start, end, phys_section_add(section));
2663 static void register_multipage(MemoryRegionSection *section)
2665 target_phys_addr_t start_addr = section->offset_within_address_space;
2666 ram_addr_t size = section->size;
2667 target_phys_addr_t addr;
2668 uint16_t section_index = phys_section_add(section);
2670 assert(size);
2672 addr = start_addr;
2673 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2674 section_index);
2677 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2678 bool readonly)
2680 MemoryRegionSection now = *section, remain = *section;
2682 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2683 || (now.size < TARGET_PAGE_SIZE)) {
2684 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2685 - now.offset_within_address_space,
2686 now.size);
2687 register_subpage(&now);
2688 remain.size -= now.size;
2689 remain.offset_within_address_space += now.size;
2690 remain.offset_within_region += now.size;
2692 now = remain;
2693 now.size &= TARGET_PAGE_MASK;
2694 if (now.size) {
2695 register_multipage(&now);
2696 remain.size -= now.size;
2697 remain.offset_within_address_space += now.size;
2698 remain.offset_within_region += now.size;
2700 now = remain;
2701 if (now.size) {
2702 register_subpage(&now);
2707 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2709 if (kvm_enabled())
2710 kvm_coalesce_mmio_region(addr, size);
2713 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2715 if (kvm_enabled())
2716 kvm_uncoalesce_mmio_region(addr, size);
2719 void qemu_flush_coalesced_mmio_buffer(void)
2721 if (kvm_enabled())
2722 kvm_flush_coalesced_mmio_buffer();
2725 #if defined(__linux__) && !defined(TARGET_S390X)
2727 #include <sys/vfs.h>
2729 #define HUGETLBFS_MAGIC 0x958458f6
2731 static long gethugepagesize(const char *path)
2733 struct statfs fs;
2734 int ret;
2736 do {
2737 ret = statfs(path, &fs);
2738 } while (ret != 0 && errno == EINTR);
2740 if (ret != 0) {
2741 perror(path);
2742 return 0;
2745 if (fs.f_type != HUGETLBFS_MAGIC)
2746 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2748 return fs.f_bsize;
2751 static void *file_ram_alloc(RAMBlock *block,
2752 ram_addr_t memory,
2753 const char *path)
2755 char *filename;
2756 void *area;
2757 int fd;
2758 #ifdef MAP_POPULATE
2759 int flags;
2760 #endif
2761 unsigned long hpagesize;
2763 hpagesize = gethugepagesize(path);
2764 if (!hpagesize) {
2765 return NULL;
2768 if (memory < hpagesize) {
2769 return NULL;
2772 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2773 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2774 return NULL;
2777 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2778 return NULL;
2781 fd = mkstemp(filename);
2782 if (fd < 0) {
2783 perror("unable to create backing store for hugepages");
2784 free(filename);
2785 return NULL;
2787 unlink(filename);
2788 free(filename);
2790 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2793 * ftruncate is not supported by hugetlbfs in older
2794 * hosts, so don't bother bailing out on errors.
2795 * If anything goes wrong with it under other filesystems,
2796 * mmap will fail.
2798 if (ftruncate(fd, memory))
2799 perror("ftruncate");
2801 #ifdef MAP_POPULATE
2802 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2803 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2804 * to sidestep this quirk.
2806 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2807 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2808 #else
2809 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2810 #endif
2811 if (area == MAP_FAILED) {
2812 perror("file_ram_alloc: can't mmap RAM pages");
2813 close(fd);
2814 return (NULL);
2816 block->fd = fd;
2817 return area;
2819 #endif
2821 static ram_addr_t find_ram_offset(ram_addr_t size)
2823 RAMBlock *block, *next_block;
2824 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2826 if (QLIST_EMPTY(&ram_list.blocks))
2827 return 0;
2829 QLIST_FOREACH(block, &ram_list.blocks, next) {
2830 ram_addr_t end, next = RAM_ADDR_MAX;
2832 end = block->offset + block->length;
2834 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2835 if (next_block->offset >= end) {
2836 next = MIN(next, next_block->offset);
2839 if (next - end >= size && next - end < mingap) {
2840 offset = end;
2841 mingap = next - end;
2845 if (offset == RAM_ADDR_MAX) {
2846 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2847 (uint64_t)size);
2848 abort();
2851 return offset;
2854 static ram_addr_t last_ram_offset(void)
2856 RAMBlock *block;
2857 ram_addr_t last = 0;
2859 QLIST_FOREACH(block, &ram_list.blocks, next)
2860 last = MAX(last, block->offset + block->length);
2862 return last;
2865 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2867 RAMBlock *new_block, *block;
2869 new_block = NULL;
2870 QLIST_FOREACH(block, &ram_list.blocks, next) {
2871 if (block->offset == addr) {
2872 new_block = block;
2873 break;
2876 assert(new_block);
2877 assert(!new_block->idstr[0]);
2879 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2880 char *id = dev->parent_bus->info->get_dev_path(dev);
2881 if (id) {
2882 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2883 g_free(id);
2886 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2888 QLIST_FOREACH(block, &ram_list.blocks, next) {
2889 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2890 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2891 new_block->idstr);
2892 abort();
2897 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2898 MemoryRegion *mr)
2900 RAMBlock *new_block;
2902 size = TARGET_PAGE_ALIGN(size);
2903 new_block = g_malloc0(sizeof(*new_block));
2905 new_block->mr = mr;
2906 new_block->offset = find_ram_offset(size);
2907 if (host) {
2908 new_block->host = host;
2909 new_block->flags |= RAM_PREALLOC_MASK;
2910 } else {
2911 if (mem_path) {
2912 #if defined (__linux__) && !defined(TARGET_S390X)
2913 new_block->host = file_ram_alloc(new_block, size, mem_path);
2914 if (!new_block->host) {
2915 new_block->host = qemu_vmalloc(size);
2916 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2918 #else
2919 fprintf(stderr, "-mem-path option unsupported\n");
2920 exit(1);
2921 #endif
2922 } else {
2923 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2924 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2925 an system defined value, which is at least 256GB. Larger systems
2926 have larger values. We put the guest between the end of data
2927 segment (system break) and this value. We use 32GB as a base to
2928 have enough room for the system break to grow. */
2929 new_block->host = mmap((void*)0x800000000, size,
2930 PROT_EXEC|PROT_READ|PROT_WRITE,
2931 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2932 if (new_block->host == MAP_FAILED) {
2933 fprintf(stderr, "Allocating RAM failed\n");
2934 abort();
2936 #else
2937 if (xen_enabled()) {
2938 xen_ram_alloc(new_block->offset, size, mr);
2939 } else {
2940 new_block->host = qemu_vmalloc(size);
2942 #endif
2943 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2946 new_block->length = size;
2948 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2950 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2951 last_ram_offset() >> TARGET_PAGE_BITS);
2952 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2953 0xff, size >> TARGET_PAGE_BITS);
2955 if (kvm_enabled())
2956 kvm_setup_guest_memory(new_block->host, size);
2958 return new_block->offset;
2961 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2963 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2966 void qemu_ram_free_from_ptr(ram_addr_t addr)
2968 RAMBlock *block;
2970 QLIST_FOREACH(block, &ram_list.blocks, next) {
2971 if (addr == block->offset) {
2972 QLIST_REMOVE(block, next);
2973 g_free(block);
2974 return;
2979 void qemu_ram_free(ram_addr_t addr)
2981 RAMBlock *block;
2983 QLIST_FOREACH(block, &ram_list.blocks, next) {
2984 if (addr == block->offset) {
2985 QLIST_REMOVE(block, next);
2986 if (block->flags & RAM_PREALLOC_MASK) {
2988 } else if (mem_path) {
2989 #if defined (__linux__) && !defined(TARGET_S390X)
2990 if (block->fd) {
2991 munmap(block->host, block->length);
2992 close(block->fd);
2993 } else {
2994 qemu_vfree(block->host);
2996 #else
2997 abort();
2998 #endif
2999 } else {
3000 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3001 munmap(block->host, block->length);
3002 #else
3003 if (xen_enabled()) {
3004 xen_invalidate_map_cache_entry(block->host);
3005 } else {
3006 qemu_vfree(block->host);
3008 #endif
3010 g_free(block);
3011 return;
3017 #ifndef _WIN32
3018 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3020 RAMBlock *block;
3021 ram_addr_t offset;
3022 int flags;
3023 void *area, *vaddr;
3025 QLIST_FOREACH(block, &ram_list.blocks, next) {
3026 offset = addr - block->offset;
3027 if (offset < block->length) {
3028 vaddr = block->host + offset;
3029 if (block->flags & RAM_PREALLOC_MASK) {
3031 } else {
3032 flags = MAP_FIXED;
3033 munmap(vaddr, length);
3034 if (mem_path) {
3035 #if defined(__linux__) && !defined(TARGET_S390X)
3036 if (block->fd) {
3037 #ifdef MAP_POPULATE
3038 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3039 MAP_PRIVATE;
3040 #else
3041 flags |= MAP_PRIVATE;
3042 #endif
3043 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3044 flags, block->fd, offset);
3045 } else {
3046 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3047 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3048 flags, -1, 0);
3050 #else
3051 abort();
3052 #endif
3053 } else {
3054 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3055 flags |= MAP_SHARED | MAP_ANONYMOUS;
3056 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3057 flags, -1, 0);
3058 #else
3059 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3060 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3061 flags, -1, 0);
3062 #endif
3064 if (area != vaddr) {
3065 fprintf(stderr, "Could not remap addr: "
3066 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3067 length, addr);
3068 exit(1);
3070 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3072 return;
3076 #endif /* !_WIN32 */
3078 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3079 With the exception of the softmmu code in this file, this should
3080 only be used for local memory (e.g. video ram) that the device owns,
3081 and knows it isn't going to access beyond the end of the block.
3083 It should not be used for general purpose DMA.
3084 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3086 void *qemu_get_ram_ptr(ram_addr_t addr)
3088 RAMBlock *block;
3090 QLIST_FOREACH(block, &ram_list.blocks, next) {
3091 if (addr - block->offset < block->length) {
3092 /* Move this entry to to start of the list. */
3093 if (block != QLIST_FIRST(&ram_list.blocks)) {
3094 QLIST_REMOVE(block, next);
3095 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3097 if (xen_enabled()) {
3098 /* We need to check if the requested address is in the RAM
3099 * because we don't want to map the entire memory in QEMU.
3100 * In that case just map until the end of the page.
3102 if (block->offset == 0) {
3103 return xen_map_cache(addr, 0, 0);
3104 } else if (block->host == NULL) {
3105 block->host =
3106 xen_map_cache(block->offset, block->length, 1);
3109 return block->host + (addr - block->offset);
3113 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3114 abort();
3116 return NULL;
3119 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3120 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3122 void *qemu_safe_ram_ptr(ram_addr_t addr)
3124 RAMBlock *block;
3126 QLIST_FOREACH(block, &ram_list.blocks, next) {
3127 if (addr - block->offset < block->length) {
3128 if (xen_enabled()) {
3129 /* We need to check if the requested address is in the RAM
3130 * because we don't want to map the entire memory in QEMU.
3131 * In that case just map until the end of the page.
3133 if (block->offset == 0) {
3134 return xen_map_cache(addr, 0, 0);
3135 } else if (block->host == NULL) {
3136 block->host =
3137 xen_map_cache(block->offset, block->length, 1);
3140 return block->host + (addr - block->offset);
3144 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3145 abort();
3147 return NULL;
3150 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3151 * but takes a size argument */
3152 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3154 if (*size == 0) {
3155 return NULL;
3157 if (xen_enabled()) {
3158 return xen_map_cache(addr, *size, 1);
3159 } else {
3160 RAMBlock *block;
3162 QLIST_FOREACH(block, &ram_list.blocks, next) {
3163 if (addr - block->offset < block->length) {
3164 if (addr - block->offset + *size > block->length)
3165 *size = block->length - addr + block->offset;
3166 return block->host + (addr - block->offset);
3170 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3171 abort();
3175 void qemu_put_ram_ptr(void *addr)
3177 trace_qemu_put_ram_ptr(addr);
3180 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3182 RAMBlock *block;
3183 uint8_t *host = ptr;
3185 if (xen_enabled()) {
3186 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3187 return 0;
3190 QLIST_FOREACH(block, &ram_list.blocks, next) {
3191 /* This case append when the block is not mapped. */
3192 if (block->host == NULL) {
3193 continue;
3195 if (host - block->host < block->length) {
3196 *ram_addr = block->offset + (host - block->host);
3197 return 0;
3201 return -1;
3204 /* Some of the softmmu routines need to translate from a host pointer
3205 (typically a TLB entry) back to a ram offset. */
3206 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3208 ram_addr_t ram_addr;
3210 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3211 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3212 abort();
3214 return ram_addr;
3217 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
3218 unsigned size)
3220 #ifdef DEBUG_UNASSIGNED
3221 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3222 #endif
3223 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3224 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
3225 #endif
3226 return 0;
3229 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
3230 uint64_t val, unsigned size)
3232 #ifdef DEBUG_UNASSIGNED
3233 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
3234 #endif
3235 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3236 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
3237 #endif
3240 static const MemoryRegionOps unassigned_mem_ops = {
3241 .read = unassigned_mem_read,
3242 .write = unassigned_mem_write,
3243 .endianness = DEVICE_NATIVE_ENDIAN,
3246 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
3247 unsigned size)
3249 abort();
3252 static void error_mem_write(void *opaque, target_phys_addr_t addr,
3253 uint64_t value, unsigned size)
3255 abort();
3258 static const MemoryRegionOps error_mem_ops = {
3259 .read = error_mem_read,
3260 .write = error_mem_write,
3261 .endianness = DEVICE_NATIVE_ENDIAN,
3264 static const MemoryRegionOps rom_mem_ops = {
3265 .read = error_mem_read,
3266 .write = unassigned_mem_write,
3267 .endianness = DEVICE_NATIVE_ENDIAN,
3270 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
3271 uint64_t val, unsigned size)
3273 int dirty_flags;
3274 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3275 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3276 #if !defined(CONFIG_USER_ONLY)
3277 tb_invalidate_phys_page_fast(ram_addr, size);
3278 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3279 #endif
3281 switch (size) {
3282 case 1:
3283 stb_p(qemu_get_ram_ptr(ram_addr), val);
3284 break;
3285 case 2:
3286 stw_p(qemu_get_ram_ptr(ram_addr), val);
3287 break;
3288 case 4:
3289 stl_p(qemu_get_ram_ptr(ram_addr), val);
3290 break;
3291 default:
3292 abort();
3294 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3295 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3296 /* we remove the notdirty callback only if the code has been
3297 flushed */
3298 if (dirty_flags == 0xff)
3299 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3302 static const MemoryRegionOps notdirty_mem_ops = {
3303 .read = error_mem_read,
3304 .write = notdirty_mem_write,
3305 .endianness = DEVICE_NATIVE_ENDIAN,
3308 /* Generate a debug exception if a watchpoint has been hit. */
3309 static void check_watchpoint(int offset, int len_mask, int flags)
3311 CPUArchState *env = cpu_single_env;
3312 target_ulong pc, cs_base;
3313 TranslationBlock *tb;
3314 target_ulong vaddr;
3315 CPUWatchpoint *wp;
3316 int cpu_flags;
3318 if (env->watchpoint_hit) {
3319 /* We re-entered the check after replacing the TB. Now raise
3320 * the debug interrupt so that is will trigger after the
3321 * current instruction. */
3322 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3323 return;
3325 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3326 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3327 if ((vaddr == (wp->vaddr & len_mask) ||
3328 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3329 wp->flags |= BP_WATCHPOINT_HIT;
3330 if (!env->watchpoint_hit) {
3331 env->watchpoint_hit = wp;
3332 tb = tb_find_pc(env->mem_io_pc);
3333 if (!tb) {
3334 cpu_abort(env, "check_watchpoint: could not find TB for "
3335 "pc=%p", (void *)env->mem_io_pc);
3337 cpu_restore_state(tb, env, env->mem_io_pc);
3338 tb_phys_invalidate(tb, -1);
3339 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3340 env->exception_index = EXCP_DEBUG;
3341 cpu_loop_exit(env);
3342 } else {
3343 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3344 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3345 cpu_resume_from_signal(env, NULL);
3348 } else {
3349 wp->flags &= ~BP_WATCHPOINT_HIT;
3354 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3355 so these check for a hit then pass through to the normal out-of-line
3356 phys routines. */
3357 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3358 unsigned size)
3360 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3361 switch (size) {
3362 case 1: return ldub_phys(addr);
3363 case 2: return lduw_phys(addr);
3364 case 4: return ldl_phys(addr);
3365 default: abort();
3369 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3370 uint64_t val, unsigned size)
3372 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3373 switch (size) {
3374 case 1:
3375 stb_phys(addr, val);
3376 break;
3377 case 2:
3378 stw_phys(addr, val);
3379 break;
3380 case 4:
3381 stl_phys(addr, val);
3382 break;
3383 default: abort();
3387 static const MemoryRegionOps watch_mem_ops = {
3388 .read = watch_mem_read,
3389 .write = watch_mem_write,
3390 .endianness = DEVICE_NATIVE_ENDIAN,
3393 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3394 unsigned len)
3396 subpage_t *mmio = opaque;
3397 unsigned int idx = SUBPAGE_IDX(addr);
3398 MemoryRegionSection *section;
3399 #if defined(DEBUG_SUBPAGE)
3400 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3401 mmio, len, addr, idx);
3402 #endif
3404 section = &phys_sections[mmio->sub_section[idx]];
3405 addr += mmio->base;
3406 addr -= section->offset_within_address_space;
3407 addr += section->offset_within_region;
3408 return io_mem_read(section->mr, addr, len);
3411 static void subpage_write(void *opaque, target_phys_addr_t addr,
3412 uint64_t value, unsigned len)
3414 subpage_t *mmio = opaque;
3415 unsigned int idx = SUBPAGE_IDX(addr);
3416 MemoryRegionSection *section;
3417 #if defined(DEBUG_SUBPAGE)
3418 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3419 " idx %d value %"PRIx64"\n",
3420 __func__, mmio, len, addr, idx, value);
3421 #endif
3423 section = &phys_sections[mmio->sub_section[idx]];
3424 addr += mmio->base;
3425 addr -= section->offset_within_address_space;
3426 addr += section->offset_within_region;
3427 io_mem_write(section->mr, addr, value, len);
3430 static const MemoryRegionOps subpage_ops = {
3431 .read = subpage_read,
3432 .write = subpage_write,
3433 .endianness = DEVICE_NATIVE_ENDIAN,
3436 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3437 unsigned size)
3439 ram_addr_t raddr = addr;
3440 void *ptr = qemu_get_ram_ptr(raddr);
3441 switch (size) {
3442 case 1: return ldub_p(ptr);
3443 case 2: return lduw_p(ptr);
3444 case 4: return ldl_p(ptr);
3445 default: abort();
3449 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3450 uint64_t value, unsigned size)
3452 ram_addr_t raddr = addr;
3453 void *ptr = qemu_get_ram_ptr(raddr);
3454 switch (size) {
3455 case 1: return stb_p(ptr, value);
3456 case 2: return stw_p(ptr, value);
3457 case 4: return stl_p(ptr, value);
3458 default: abort();
3462 static const MemoryRegionOps subpage_ram_ops = {
3463 .read = subpage_ram_read,
3464 .write = subpage_ram_write,
3465 .endianness = DEVICE_NATIVE_ENDIAN,
3468 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3469 uint16_t section)
3471 int idx, eidx;
3473 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3474 return -1;
3475 idx = SUBPAGE_IDX(start);
3476 eidx = SUBPAGE_IDX(end);
3477 #if defined(DEBUG_SUBPAGE)
3478 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3479 mmio, start, end, idx, eidx, memory);
3480 #endif
3481 if (memory_region_is_ram(phys_sections[section].mr)) {
3482 MemoryRegionSection new_section = phys_sections[section];
3483 new_section.mr = &io_mem_subpage_ram;
3484 section = phys_section_add(&new_section);
3486 for (; idx <= eidx; idx++) {
3487 mmio->sub_section[idx] = section;
3490 return 0;
3493 static subpage_t *subpage_init(target_phys_addr_t base)
3495 subpage_t *mmio;
3497 mmio = g_malloc0(sizeof(subpage_t));
3499 mmio->base = base;
3500 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3501 "subpage", TARGET_PAGE_SIZE);
3502 mmio->iomem.subpage = true;
3503 #if defined(DEBUG_SUBPAGE)
3504 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3505 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3506 #endif
3507 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3509 return mmio;
3512 static uint16_t dummy_section(MemoryRegion *mr)
3514 MemoryRegionSection section = {
3515 .mr = mr,
3516 .offset_within_address_space = 0,
3517 .offset_within_region = 0,
3518 .size = UINT64_MAX,
3521 return phys_section_add(&section);
3524 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3526 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3529 static void io_mem_init(void)
3531 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3532 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3533 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3534 "unassigned", UINT64_MAX);
3535 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3536 "notdirty", UINT64_MAX);
3537 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3538 "subpage-ram", UINT64_MAX);
3539 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3540 "watch", UINT64_MAX);
3543 static void core_begin(MemoryListener *listener)
3545 destroy_all_mappings();
3546 phys_sections_clear();
3547 phys_map.ptr = PHYS_MAP_NODE_NIL;
3548 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3549 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3550 phys_section_rom = dummy_section(&io_mem_rom);
3551 phys_section_watch = dummy_section(&io_mem_watch);
3554 static void core_commit(MemoryListener *listener)
3556 CPUArchState *env;
3558 /* since each CPU stores ram addresses in its TLB cache, we must
3559 reset the modified entries */
3560 /* XXX: slow ! */
3561 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3562 tlb_flush(env, 1);
3566 static void core_region_add(MemoryListener *listener,
3567 MemoryRegionSection *section)
3569 cpu_register_physical_memory_log(section, section->readonly);
3572 static void core_region_del(MemoryListener *listener,
3573 MemoryRegionSection *section)
3577 static void core_region_nop(MemoryListener *listener,
3578 MemoryRegionSection *section)
3580 cpu_register_physical_memory_log(section, section->readonly);
3583 static void core_log_start(MemoryListener *listener,
3584 MemoryRegionSection *section)
3588 static void core_log_stop(MemoryListener *listener,
3589 MemoryRegionSection *section)
3593 static void core_log_sync(MemoryListener *listener,
3594 MemoryRegionSection *section)
3598 static void core_log_global_start(MemoryListener *listener)
3600 cpu_physical_memory_set_dirty_tracking(1);
3603 static void core_log_global_stop(MemoryListener *listener)
3605 cpu_physical_memory_set_dirty_tracking(0);
3608 static void core_eventfd_add(MemoryListener *listener,
3609 MemoryRegionSection *section,
3610 bool match_data, uint64_t data, int fd)
3614 static void core_eventfd_del(MemoryListener *listener,
3615 MemoryRegionSection *section,
3616 bool match_data, uint64_t data, int fd)
3620 static void io_begin(MemoryListener *listener)
3624 static void io_commit(MemoryListener *listener)
3628 static void io_region_add(MemoryListener *listener,
3629 MemoryRegionSection *section)
3631 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3633 mrio->mr = section->mr;
3634 mrio->offset = section->offset_within_region;
3635 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3636 section->offset_within_address_space, section->size);
3637 ioport_register(&mrio->iorange);
3640 static void io_region_del(MemoryListener *listener,
3641 MemoryRegionSection *section)
3643 isa_unassign_ioport(section->offset_within_address_space, section->size);
3646 static void io_region_nop(MemoryListener *listener,
3647 MemoryRegionSection *section)
3651 static void io_log_start(MemoryListener *listener,
3652 MemoryRegionSection *section)
3656 static void io_log_stop(MemoryListener *listener,
3657 MemoryRegionSection *section)
3661 static void io_log_sync(MemoryListener *listener,
3662 MemoryRegionSection *section)
3666 static void io_log_global_start(MemoryListener *listener)
3670 static void io_log_global_stop(MemoryListener *listener)
3674 static void io_eventfd_add(MemoryListener *listener,
3675 MemoryRegionSection *section,
3676 bool match_data, uint64_t data, int fd)
3680 static void io_eventfd_del(MemoryListener *listener,
3681 MemoryRegionSection *section,
3682 bool match_data, uint64_t data, int fd)
3686 static MemoryListener core_memory_listener = {
3687 .begin = core_begin,
3688 .commit = core_commit,
3689 .region_add = core_region_add,
3690 .region_del = core_region_del,
3691 .region_nop = core_region_nop,
3692 .log_start = core_log_start,
3693 .log_stop = core_log_stop,
3694 .log_sync = core_log_sync,
3695 .log_global_start = core_log_global_start,
3696 .log_global_stop = core_log_global_stop,
3697 .eventfd_add = core_eventfd_add,
3698 .eventfd_del = core_eventfd_del,
3699 .priority = 0,
3702 static MemoryListener io_memory_listener = {
3703 .begin = io_begin,
3704 .commit = io_commit,
3705 .region_add = io_region_add,
3706 .region_del = io_region_del,
3707 .region_nop = io_region_nop,
3708 .log_start = io_log_start,
3709 .log_stop = io_log_stop,
3710 .log_sync = io_log_sync,
3711 .log_global_start = io_log_global_start,
3712 .log_global_stop = io_log_global_stop,
3713 .eventfd_add = io_eventfd_add,
3714 .eventfd_del = io_eventfd_del,
3715 .priority = 0,
3718 static void memory_map_init(void)
3720 system_memory = g_malloc(sizeof(*system_memory));
3721 memory_region_init(system_memory, "system", INT64_MAX);
3722 set_system_memory_map(system_memory);
3724 system_io = g_malloc(sizeof(*system_io));
3725 memory_region_init(system_io, "io", 65536);
3726 set_system_io_map(system_io);
3728 memory_listener_register(&core_memory_listener, system_memory);
3729 memory_listener_register(&io_memory_listener, system_io);
3732 MemoryRegion *get_system_memory(void)
3734 return system_memory;
3737 MemoryRegion *get_system_io(void)
3739 return system_io;
3742 #endif /* !defined(CONFIG_USER_ONLY) */
3744 /* physical memory access (slow version, mainly for debug) */
3745 #if defined(CONFIG_USER_ONLY)
3746 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3747 uint8_t *buf, int len, int is_write)
3749 int l, flags;
3750 target_ulong page;
3751 void * p;
3753 while (len > 0) {
3754 page = addr & TARGET_PAGE_MASK;
3755 l = (page + TARGET_PAGE_SIZE) - addr;
3756 if (l > len)
3757 l = len;
3758 flags = page_get_flags(page);
3759 if (!(flags & PAGE_VALID))
3760 return -1;
3761 if (is_write) {
3762 if (!(flags & PAGE_WRITE))
3763 return -1;
3764 /* XXX: this code should not depend on lock_user */
3765 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3766 return -1;
3767 memcpy(p, buf, l);
3768 unlock_user(p, addr, l);
3769 } else {
3770 if (!(flags & PAGE_READ))
3771 return -1;
3772 /* XXX: this code should not depend on lock_user */
3773 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3774 return -1;
3775 memcpy(buf, p, l);
3776 unlock_user(p, addr, 0);
3778 len -= l;
3779 buf += l;
3780 addr += l;
3782 return 0;
3785 #else
3786 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3787 int len, int is_write)
3789 int l;
3790 uint8_t *ptr;
3791 uint32_t val;
3792 target_phys_addr_t page;
3793 MemoryRegionSection *section;
3795 while (len > 0) {
3796 page = addr & TARGET_PAGE_MASK;
3797 l = (page + TARGET_PAGE_SIZE) - addr;
3798 if (l > len)
3799 l = len;
3800 section = phys_page_find(page >> TARGET_PAGE_BITS);
3802 if (is_write) {
3803 if (!memory_region_is_ram(section->mr)) {
3804 target_phys_addr_t addr1;
3805 addr1 = section_addr(section, addr);
3806 /* XXX: could force cpu_single_env to NULL to avoid
3807 potential bugs */
3808 if (l >= 4 && ((addr1 & 3) == 0)) {
3809 /* 32 bit write access */
3810 val = ldl_p(buf);
3811 io_mem_write(section->mr, addr1, val, 4);
3812 l = 4;
3813 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3814 /* 16 bit write access */
3815 val = lduw_p(buf);
3816 io_mem_write(section->mr, addr1, val, 2);
3817 l = 2;
3818 } else {
3819 /* 8 bit write access */
3820 val = ldub_p(buf);
3821 io_mem_write(section->mr, addr1, val, 1);
3822 l = 1;
3824 } else if (!section->readonly) {
3825 ram_addr_t addr1;
3826 addr1 = memory_region_get_ram_addr(section->mr)
3827 + section_addr(section, addr);
3828 /* RAM case */
3829 ptr = qemu_get_ram_ptr(addr1);
3830 memcpy(ptr, buf, l);
3831 if (!cpu_physical_memory_is_dirty(addr1)) {
3832 /* invalidate code */
3833 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3834 /* set dirty bit */
3835 cpu_physical_memory_set_dirty_flags(
3836 addr1, (0xff & ~CODE_DIRTY_FLAG));
3838 qemu_put_ram_ptr(ptr);
3840 } else {
3841 if (!is_ram_rom_romd(section)) {
3842 target_phys_addr_t addr1;
3843 /* I/O case */
3844 addr1 = section_addr(section, addr);
3845 if (l >= 4 && ((addr1 & 3) == 0)) {
3846 /* 32 bit read access */
3847 val = io_mem_read(section->mr, addr1, 4);
3848 stl_p(buf, val);
3849 l = 4;
3850 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3851 /* 16 bit read access */
3852 val = io_mem_read(section->mr, addr1, 2);
3853 stw_p(buf, val);
3854 l = 2;
3855 } else {
3856 /* 8 bit read access */
3857 val = io_mem_read(section->mr, addr1, 1);
3858 stb_p(buf, val);
3859 l = 1;
3861 } else {
3862 /* RAM case */
3863 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3864 + section_addr(section, addr));
3865 memcpy(buf, ptr, l);
3866 qemu_put_ram_ptr(ptr);
3869 len -= l;
3870 buf += l;
3871 addr += l;
3875 /* used for ROM loading : can write in RAM and ROM */
3876 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3877 const uint8_t *buf, int len)
3879 int l;
3880 uint8_t *ptr;
3881 target_phys_addr_t page;
3882 MemoryRegionSection *section;
3884 while (len > 0) {
3885 page = addr & TARGET_PAGE_MASK;
3886 l = (page + TARGET_PAGE_SIZE) - addr;
3887 if (l > len)
3888 l = len;
3889 section = phys_page_find(page >> TARGET_PAGE_BITS);
3891 if (!is_ram_rom_romd(section)) {
3892 /* do nothing */
3893 } else {
3894 unsigned long addr1;
3895 addr1 = memory_region_get_ram_addr(section->mr)
3896 + section_addr(section, addr);
3897 /* ROM/RAM case */
3898 ptr = qemu_get_ram_ptr(addr1);
3899 memcpy(ptr, buf, l);
3900 qemu_put_ram_ptr(ptr);
3902 len -= l;
3903 buf += l;
3904 addr += l;
3908 typedef struct {
3909 void *buffer;
3910 target_phys_addr_t addr;
3911 target_phys_addr_t len;
3912 } BounceBuffer;
3914 static BounceBuffer bounce;
3916 typedef struct MapClient {
3917 void *opaque;
3918 void (*callback)(void *opaque);
3919 QLIST_ENTRY(MapClient) link;
3920 } MapClient;
3922 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3923 = QLIST_HEAD_INITIALIZER(map_client_list);
3925 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3927 MapClient *client = g_malloc(sizeof(*client));
3929 client->opaque = opaque;
3930 client->callback = callback;
3931 QLIST_INSERT_HEAD(&map_client_list, client, link);
3932 return client;
3935 void cpu_unregister_map_client(void *_client)
3937 MapClient *client = (MapClient *)_client;
3939 QLIST_REMOVE(client, link);
3940 g_free(client);
3943 static void cpu_notify_map_clients(void)
3945 MapClient *client;
3947 while (!QLIST_EMPTY(&map_client_list)) {
3948 client = QLIST_FIRST(&map_client_list);
3949 client->callback(client->opaque);
3950 cpu_unregister_map_client(client);
3954 /* Map a physical memory region into a host virtual address.
3955 * May map a subset of the requested range, given by and returned in *plen.
3956 * May return NULL if resources needed to perform the mapping are exhausted.
3957 * Use only for reads OR writes - not for read-modify-write operations.
3958 * Use cpu_register_map_client() to know when retrying the map operation is
3959 * likely to succeed.
3961 void *cpu_physical_memory_map(target_phys_addr_t addr,
3962 target_phys_addr_t *plen,
3963 int is_write)
3965 target_phys_addr_t len = *plen;
3966 target_phys_addr_t todo = 0;
3967 int l;
3968 target_phys_addr_t page;
3969 MemoryRegionSection *section;
3970 ram_addr_t raddr = RAM_ADDR_MAX;
3971 ram_addr_t rlen;
3972 void *ret;
3974 while (len > 0) {
3975 page = addr & TARGET_PAGE_MASK;
3976 l = (page + TARGET_PAGE_SIZE) - addr;
3977 if (l > len)
3978 l = len;
3979 section = phys_page_find(page >> TARGET_PAGE_BITS);
3981 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3982 if (todo || bounce.buffer) {
3983 break;
3985 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3986 bounce.addr = addr;
3987 bounce.len = l;
3988 if (!is_write) {
3989 cpu_physical_memory_read(addr, bounce.buffer, l);
3992 *plen = l;
3993 return bounce.buffer;
3995 if (!todo) {
3996 raddr = memory_region_get_ram_addr(section->mr)
3997 + section_addr(section, addr);
4000 len -= l;
4001 addr += l;
4002 todo += l;
4004 rlen = todo;
4005 ret = qemu_ram_ptr_length(raddr, &rlen);
4006 *plen = rlen;
4007 return ret;
4010 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4011 * Will also mark the memory as dirty if is_write == 1. access_len gives
4012 * the amount of memory that was actually read or written by the caller.
4014 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4015 int is_write, target_phys_addr_t access_len)
4017 if (buffer != bounce.buffer) {
4018 if (is_write) {
4019 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4020 while (access_len) {
4021 unsigned l;
4022 l = TARGET_PAGE_SIZE;
4023 if (l > access_len)
4024 l = access_len;
4025 if (!cpu_physical_memory_is_dirty(addr1)) {
4026 /* invalidate code */
4027 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4028 /* set dirty bit */
4029 cpu_physical_memory_set_dirty_flags(
4030 addr1, (0xff & ~CODE_DIRTY_FLAG));
4032 addr1 += l;
4033 access_len -= l;
4036 if (xen_enabled()) {
4037 xen_invalidate_map_cache_entry(buffer);
4039 return;
4041 if (is_write) {
4042 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4044 qemu_vfree(bounce.buffer);
4045 bounce.buffer = NULL;
4046 cpu_notify_map_clients();
4049 /* warning: addr must be aligned */
4050 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4051 enum device_endian endian)
4053 uint8_t *ptr;
4054 uint32_t val;
4055 MemoryRegionSection *section;
4057 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4059 if (!is_ram_rom_romd(section)) {
4060 /* I/O case */
4061 addr = section_addr(section, addr);
4062 val = io_mem_read(section->mr, addr, 4);
4063 #if defined(TARGET_WORDS_BIGENDIAN)
4064 if (endian == DEVICE_LITTLE_ENDIAN) {
4065 val = bswap32(val);
4067 #else
4068 if (endian == DEVICE_BIG_ENDIAN) {
4069 val = bswap32(val);
4071 #endif
4072 } else {
4073 /* RAM case */
4074 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4075 & TARGET_PAGE_MASK)
4076 + section_addr(section, addr));
4077 switch (endian) {
4078 case DEVICE_LITTLE_ENDIAN:
4079 val = ldl_le_p(ptr);
4080 break;
4081 case DEVICE_BIG_ENDIAN:
4082 val = ldl_be_p(ptr);
4083 break;
4084 default:
4085 val = ldl_p(ptr);
4086 break;
4089 return val;
4092 uint32_t ldl_phys(target_phys_addr_t addr)
4094 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4097 uint32_t ldl_le_phys(target_phys_addr_t addr)
4099 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4102 uint32_t ldl_be_phys(target_phys_addr_t addr)
4104 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4107 /* warning: addr must be aligned */
4108 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4109 enum device_endian endian)
4111 uint8_t *ptr;
4112 uint64_t val;
4113 MemoryRegionSection *section;
4115 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4117 if (!is_ram_rom_romd(section)) {
4118 /* I/O case */
4119 addr = section_addr(section, addr);
4121 /* XXX This is broken when device endian != cpu endian.
4122 Fix and add "endian" variable check */
4123 #ifdef TARGET_WORDS_BIGENDIAN
4124 val = io_mem_read(section->mr, addr, 4) << 32;
4125 val |= io_mem_read(section->mr, addr + 4, 4);
4126 #else
4127 val = io_mem_read(section->mr, addr, 4);
4128 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
4129 #endif
4130 } else {
4131 /* RAM case */
4132 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4133 & TARGET_PAGE_MASK)
4134 + section_addr(section, addr));
4135 switch (endian) {
4136 case DEVICE_LITTLE_ENDIAN:
4137 val = ldq_le_p(ptr);
4138 break;
4139 case DEVICE_BIG_ENDIAN:
4140 val = ldq_be_p(ptr);
4141 break;
4142 default:
4143 val = ldq_p(ptr);
4144 break;
4147 return val;
4150 uint64_t ldq_phys(target_phys_addr_t addr)
4152 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4155 uint64_t ldq_le_phys(target_phys_addr_t addr)
4157 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4160 uint64_t ldq_be_phys(target_phys_addr_t addr)
4162 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4165 /* XXX: optimize */
4166 uint32_t ldub_phys(target_phys_addr_t addr)
4168 uint8_t val;
4169 cpu_physical_memory_read(addr, &val, 1);
4170 return val;
4173 /* warning: addr must be aligned */
4174 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4175 enum device_endian endian)
4177 uint8_t *ptr;
4178 uint64_t val;
4179 MemoryRegionSection *section;
4181 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4183 if (!is_ram_rom_romd(section)) {
4184 /* I/O case */
4185 addr = section_addr(section, addr);
4186 val = io_mem_read(section->mr, addr, 2);
4187 #if defined(TARGET_WORDS_BIGENDIAN)
4188 if (endian == DEVICE_LITTLE_ENDIAN) {
4189 val = bswap16(val);
4191 #else
4192 if (endian == DEVICE_BIG_ENDIAN) {
4193 val = bswap16(val);
4195 #endif
4196 } else {
4197 /* RAM case */
4198 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4199 & TARGET_PAGE_MASK)
4200 + section_addr(section, addr));
4201 switch (endian) {
4202 case DEVICE_LITTLE_ENDIAN:
4203 val = lduw_le_p(ptr);
4204 break;
4205 case DEVICE_BIG_ENDIAN:
4206 val = lduw_be_p(ptr);
4207 break;
4208 default:
4209 val = lduw_p(ptr);
4210 break;
4213 return val;
4216 uint32_t lduw_phys(target_phys_addr_t addr)
4218 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4221 uint32_t lduw_le_phys(target_phys_addr_t addr)
4223 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4226 uint32_t lduw_be_phys(target_phys_addr_t addr)
4228 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4231 /* warning: addr must be aligned. The ram page is not masked as dirty
4232 and the code inside is not invalidated. It is useful if the dirty
4233 bits are used to track modified PTEs */
4234 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4236 uint8_t *ptr;
4237 MemoryRegionSection *section;
4239 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4241 if (!memory_region_is_ram(section->mr) || section->readonly) {
4242 addr = section_addr(section, addr);
4243 if (memory_region_is_ram(section->mr)) {
4244 section = &phys_sections[phys_section_rom];
4246 io_mem_write(section->mr, addr, val, 4);
4247 } else {
4248 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
4249 & TARGET_PAGE_MASK)
4250 + section_addr(section, addr);
4251 ptr = qemu_get_ram_ptr(addr1);
4252 stl_p(ptr, val);
4254 if (unlikely(in_migration)) {
4255 if (!cpu_physical_memory_is_dirty(addr1)) {
4256 /* invalidate code */
4257 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4258 /* set dirty bit */
4259 cpu_physical_memory_set_dirty_flags(
4260 addr1, (0xff & ~CODE_DIRTY_FLAG));
4266 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4268 uint8_t *ptr;
4269 MemoryRegionSection *section;
4271 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4273 if (!memory_region_is_ram(section->mr) || section->readonly) {
4274 addr = section_addr(section, addr);
4275 if (memory_region_is_ram(section->mr)) {
4276 section = &phys_sections[phys_section_rom];
4278 #ifdef TARGET_WORDS_BIGENDIAN
4279 io_mem_write(section->mr, addr, val >> 32, 4);
4280 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
4281 #else
4282 io_mem_write(section->mr, addr, (uint32_t)val, 4);
4283 io_mem_write(section->mr, addr + 4, val >> 32, 4);
4284 #endif
4285 } else {
4286 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4287 & TARGET_PAGE_MASK)
4288 + section_addr(section, addr));
4289 stq_p(ptr, val);
4293 /* warning: addr must be aligned */
4294 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4295 enum device_endian endian)
4297 uint8_t *ptr;
4298 MemoryRegionSection *section;
4300 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4302 if (!memory_region_is_ram(section->mr) || section->readonly) {
4303 addr = section_addr(section, addr);
4304 if (memory_region_is_ram(section->mr)) {
4305 section = &phys_sections[phys_section_rom];
4307 #if defined(TARGET_WORDS_BIGENDIAN)
4308 if (endian == DEVICE_LITTLE_ENDIAN) {
4309 val = bswap32(val);
4311 #else
4312 if (endian == DEVICE_BIG_ENDIAN) {
4313 val = bswap32(val);
4315 #endif
4316 io_mem_write(section->mr, addr, val, 4);
4317 } else {
4318 unsigned long addr1;
4319 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4320 + section_addr(section, addr);
4321 /* RAM case */
4322 ptr = qemu_get_ram_ptr(addr1);
4323 switch (endian) {
4324 case DEVICE_LITTLE_ENDIAN:
4325 stl_le_p(ptr, val);
4326 break;
4327 case DEVICE_BIG_ENDIAN:
4328 stl_be_p(ptr, val);
4329 break;
4330 default:
4331 stl_p(ptr, val);
4332 break;
4334 if (!cpu_physical_memory_is_dirty(addr1)) {
4335 /* invalidate code */
4336 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4337 /* set dirty bit */
4338 cpu_physical_memory_set_dirty_flags(addr1,
4339 (0xff & ~CODE_DIRTY_FLAG));
4344 void stl_phys(target_phys_addr_t addr, uint32_t val)
4346 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4349 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4351 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4354 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4356 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4359 /* XXX: optimize */
4360 void stb_phys(target_phys_addr_t addr, uint32_t val)
4362 uint8_t v = val;
4363 cpu_physical_memory_write(addr, &v, 1);
4366 /* warning: addr must be aligned */
4367 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4368 enum device_endian endian)
4370 uint8_t *ptr;
4371 MemoryRegionSection *section;
4373 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4375 if (!memory_region_is_ram(section->mr) || section->readonly) {
4376 addr = section_addr(section, addr);
4377 if (memory_region_is_ram(section->mr)) {
4378 section = &phys_sections[phys_section_rom];
4380 #if defined(TARGET_WORDS_BIGENDIAN)
4381 if (endian == DEVICE_LITTLE_ENDIAN) {
4382 val = bswap16(val);
4384 #else
4385 if (endian == DEVICE_BIG_ENDIAN) {
4386 val = bswap16(val);
4388 #endif
4389 io_mem_write(section->mr, addr, val, 2);
4390 } else {
4391 unsigned long addr1;
4392 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4393 + section_addr(section, addr);
4394 /* RAM case */
4395 ptr = qemu_get_ram_ptr(addr1);
4396 switch (endian) {
4397 case DEVICE_LITTLE_ENDIAN:
4398 stw_le_p(ptr, val);
4399 break;
4400 case DEVICE_BIG_ENDIAN:
4401 stw_be_p(ptr, val);
4402 break;
4403 default:
4404 stw_p(ptr, val);
4405 break;
4407 if (!cpu_physical_memory_is_dirty(addr1)) {
4408 /* invalidate code */
4409 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4410 /* set dirty bit */
4411 cpu_physical_memory_set_dirty_flags(addr1,
4412 (0xff & ~CODE_DIRTY_FLAG));
4417 void stw_phys(target_phys_addr_t addr, uint32_t val)
4419 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4422 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4424 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4427 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4429 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4432 /* XXX: optimize */
4433 void stq_phys(target_phys_addr_t addr, uint64_t val)
4435 val = tswap64(val);
4436 cpu_physical_memory_write(addr, &val, 8);
4439 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4441 val = cpu_to_le64(val);
4442 cpu_physical_memory_write(addr, &val, 8);
4445 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4447 val = cpu_to_be64(val);
4448 cpu_physical_memory_write(addr, &val, 8);
4451 /* virtual memory access for debug (includes writing to ROM) */
4452 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4453 uint8_t *buf, int len, int is_write)
4455 int l;
4456 target_phys_addr_t phys_addr;
4457 target_ulong page;
4459 while (len > 0) {
4460 page = addr & TARGET_PAGE_MASK;
4461 phys_addr = cpu_get_phys_page_debug(env, page);
4462 /* if no physical page mapped, return an error */
4463 if (phys_addr == -1)
4464 return -1;
4465 l = (page + TARGET_PAGE_SIZE) - addr;
4466 if (l > len)
4467 l = len;
4468 phys_addr += (addr & ~TARGET_PAGE_MASK);
4469 if (is_write)
4470 cpu_physical_memory_write_rom(phys_addr, buf, l);
4471 else
4472 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4473 len -= l;
4474 buf += l;
4475 addr += l;
4477 return 0;
4479 #endif
4481 /* in deterministic execution mode, instructions doing device I/Os
4482 must be at the end of the TB */
4483 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4485 TranslationBlock *tb;
4486 uint32_t n, cflags;
4487 target_ulong pc, cs_base;
4488 uint64_t flags;
4490 tb = tb_find_pc(retaddr);
4491 if (!tb) {
4492 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4493 (void *)retaddr);
4495 n = env->icount_decr.u16.low + tb->icount;
4496 cpu_restore_state(tb, env, retaddr);
4497 /* Calculate how many instructions had been executed before the fault
4498 occurred. */
4499 n = n - env->icount_decr.u16.low;
4500 /* Generate a new TB ending on the I/O insn. */
4501 n++;
4502 /* On MIPS and SH, delay slot instructions can only be restarted if
4503 they were already the first instruction in the TB. If this is not
4504 the first instruction in a TB then re-execute the preceding
4505 branch. */
4506 #if defined(TARGET_MIPS)
4507 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4508 env->active_tc.PC -= 4;
4509 env->icount_decr.u16.low++;
4510 env->hflags &= ~MIPS_HFLAG_BMASK;
4512 #elif defined(TARGET_SH4)
4513 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4514 && n > 1) {
4515 env->pc -= 2;
4516 env->icount_decr.u16.low++;
4517 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4519 #endif
4520 /* This should never happen. */
4521 if (n > CF_COUNT_MASK)
4522 cpu_abort(env, "TB too big during recompile");
4524 cflags = n | CF_LAST_IO;
4525 pc = tb->pc;
4526 cs_base = tb->cs_base;
4527 flags = tb->flags;
4528 tb_phys_invalidate(tb, -1);
4529 /* FIXME: In theory this could raise an exception. In practice
4530 we have already translated the block once so it's probably ok. */
4531 tb_gen_code(env, pc, cs_base, flags, cflags);
4532 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4533 the first in the TB) then we end up generating a whole new TB and
4534 repeating the fault, which is horribly inefficient.
4535 Better would be to execute just this insn uncached, or generate a
4536 second new TB. */
4537 cpu_resume_from_signal(env, NULL);
4540 #if !defined(CONFIG_USER_ONLY)
4542 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4544 int i, target_code_size, max_target_code_size;
4545 int direct_jmp_count, direct_jmp2_count, cross_page;
4546 TranslationBlock *tb;
4548 target_code_size = 0;
4549 max_target_code_size = 0;
4550 cross_page = 0;
4551 direct_jmp_count = 0;
4552 direct_jmp2_count = 0;
4553 for(i = 0; i < nb_tbs; i++) {
4554 tb = &tbs[i];
4555 target_code_size += tb->size;
4556 if (tb->size > max_target_code_size)
4557 max_target_code_size = tb->size;
4558 if (tb->page_addr[1] != -1)
4559 cross_page++;
4560 if (tb->tb_next_offset[0] != 0xffff) {
4561 direct_jmp_count++;
4562 if (tb->tb_next_offset[1] != 0xffff) {
4563 direct_jmp2_count++;
4567 /* XXX: avoid using doubles ? */
4568 cpu_fprintf(f, "Translation buffer state:\n");
4569 cpu_fprintf(f, "gen code size %td/%ld\n",
4570 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4571 cpu_fprintf(f, "TB count %d/%d\n",
4572 nb_tbs, code_gen_max_blocks);
4573 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4574 nb_tbs ? target_code_size / nb_tbs : 0,
4575 max_target_code_size);
4576 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4577 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4578 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4579 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4580 cross_page,
4581 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4582 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4583 direct_jmp_count,
4584 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4585 direct_jmp2_count,
4586 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4587 cpu_fprintf(f, "\nStatistics:\n");
4588 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4589 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4590 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4591 tcg_dump_info(f, cpu_fprintf);
4594 /* NOTE: this function can trigger an exception */
4595 /* NOTE2: the returned address is not exactly the physical address: it
4596 is the offset relative to phys_ram_base */
4597 tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
4599 int mmu_idx, page_index, pd;
4600 void *p;
4601 MemoryRegion *mr;
4603 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
4604 mmu_idx = cpu_mmu_index(env1);
4605 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
4606 (addr & TARGET_PAGE_MASK))) {
4607 #ifdef CONFIG_TCG_PASS_AREG0
4608 cpu_ldub_code(env1, addr);
4609 #else
4610 ldub_code(addr);
4611 #endif
4613 pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
4614 mr = iotlb_to_region(pd);
4615 if (mr != &io_mem_ram && mr != &io_mem_rom
4616 && mr != &io_mem_notdirty && !mr->rom_device
4617 && mr != &io_mem_watch) {
4618 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
4619 cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
4620 #else
4621 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
4622 #endif
4624 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
4625 return qemu_ram_addr_from_host_nofail(p);
4629 * A helper function for the _utterly broken_ virtio device model to find out if
4630 * it's running on a big endian machine. Don't do this at home kids!
4632 bool virtio_is_big_endian(void);
4633 bool virtio_is_big_endian(void)
4635 #if defined(TARGET_WORDS_BIGENDIAN)
4636 return true;
4637 #else
4638 return false;
4639 #endif
4642 #define MMUSUFFIX _cmmu
4643 #undef GETPC
4644 #define GETPC() ((uintptr_t)0)
4645 #define env cpu_single_env
4646 #define SOFTMMU_CODE_ACCESS
4648 #define SHIFT 0
4649 #include "softmmu_template.h"
4651 #define SHIFT 1
4652 #include "softmmu_template.h"
4654 #define SHIFT 2
4655 #include "softmmu_template.h"
4657 #define SHIFT 3
4658 #include "softmmu_template.h"
4660 #undef env
4662 #endif