memory: store section indices in iotlb instead of io indices
[qemu/ar7.git] / exec.c
bloba35eb4f8bf3ff6f9eb346941858212491b806c50
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #define WANT_EXEC_OBSOLETE
61 #include "exec-obsolete.h"
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
118 static MemoryRegion *system_memory;
119 static MemoryRegion *system_io;
121 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
122 static MemoryRegion io_mem_subpage_ram;
124 #endif
126 CPUState *first_cpu;
127 /* current CPU in the current thread. It is only valid inside
128 cpu_exec() */
129 DEFINE_TLS(CPUState *,cpu_single_env);
130 /* 0 = Do not count executed instructions.
131 1 = Precise instruction counting.
132 2 = Adaptive rate instruction counting. */
133 int use_icount = 0;
135 typedef struct PageDesc {
136 /* list of TBs intersecting this ram page */
137 TranslationBlock *first_tb;
138 /* in order to optimize self modifying code, we count the number
139 of lookups we do to a given page to use a bitmap */
140 unsigned int code_write_count;
141 uint8_t *code_bitmap;
142 #if defined(CONFIG_USER_ONLY)
143 unsigned long flags;
144 #endif
145 } PageDesc;
147 /* In system mode we want L1_MAP to be based on ram offsets,
148 while in user mode we want it to be based on virtual addresses. */
149 #if !defined(CONFIG_USER_ONLY)
150 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
151 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
154 #endif
155 #else
156 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
157 #endif
159 /* Size of the L2 (and L3, etc) page tables. */
160 #define L2_BITS 10
161 #define L2_SIZE (1 << L2_BITS)
163 #define P_L2_LEVELS \
164 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
166 /* The bits remaining after N lower levels of page tables. */
167 #define V_L1_BITS_REM \
168 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
170 #if V_L1_BITS_REM < 4
171 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
172 #else
173 #define V_L1_BITS V_L1_BITS_REM
174 #endif
176 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
178 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
180 unsigned long qemu_real_host_page_size;
181 unsigned long qemu_host_page_size;
182 unsigned long qemu_host_page_mask;
184 /* This is a multi-level map on the virtual address space.
185 The bottom level has pointers to PageDesc. */
186 static void *l1_map[V_L1_SIZE];
188 #if !defined(CONFIG_USER_ONLY)
189 typedef struct PhysPageEntry PhysPageEntry;
191 static MemoryRegionSection *phys_sections;
192 static unsigned phys_sections_nb, phys_sections_nb_alloc;
193 static uint16_t phys_section_unassigned;
194 static uint16_t phys_section_notdirty;
195 static uint16_t phys_section_rom;
196 static uint16_t phys_section_watch;
198 struct PhysPageEntry {
199 uint16_t is_leaf : 1;
200 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
201 uint16_t ptr : 15;
204 /* Simple allocator for PhysPageEntry nodes */
205 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
206 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
208 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
210 /* This is a multi-level map on the physical address space.
211 The bottom level has pointers to MemoryRegionSections. */
212 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
214 static void io_mem_init(void);
215 static void memory_map_init(void);
217 /* io memory support */
218 MemoryRegion *io_mem_region[IO_MEM_NB_ENTRIES];
219 static char io_mem_used[IO_MEM_NB_ENTRIES];
220 static MemoryRegion io_mem_watch;
221 #endif
223 /* log support */
224 #ifdef WIN32
225 static const char *logfilename = "qemu.log";
226 #else
227 static const char *logfilename = "/tmp/qemu.log";
228 #endif
229 FILE *logfile;
230 int loglevel;
231 static int log_append = 0;
233 /* statistics */
234 #if !defined(CONFIG_USER_ONLY)
235 static int tlb_flush_count;
236 #endif
237 static int tb_flush_count;
238 static int tb_phys_invalidate_count;
240 #ifdef _WIN32
241 static void map_exec(void *addr, long size)
243 DWORD old_protect;
244 VirtualProtect(addr, size,
245 PAGE_EXECUTE_READWRITE, &old_protect);
248 #else
249 static void map_exec(void *addr, long size)
251 unsigned long start, end, page_size;
253 page_size = getpagesize();
254 start = (unsigned long)addr;
255 start &= ~(page_size - 1);
257 end = (unsigned long)addr + size;
258 end += page_size - 1;
259 end &= ~(page_size - 1);
261 mprotect((void *)start, end - start,
262 PROT_READ | PROT_WRITE | PROT_EXEC);
264 #endif
266 static void page_init(void)
268 /* NOTE: we can always suppose that qemu_host_page_size >=
269 TARGET_PAGE_SIZE */
270 #ifdef _WIN32
272 SYSTEM_INFO system_info;
274 GetSystemInfo(&system_info);
275 qemu_real_host_page_size = system_info.dwPageSize;
277 #else
278 qemu_real_host_page_size = getpagesize();
279 #endif
280 if (qemu_host_page_size == 0)
281 qemu_host_page_size = qemu_real_host_page_size;
282 if (qemu_host_page_size < TARGET_PAGE_SIZE)
283 qemu_host_page_size = TARGET_PAGE_SIZE;
284 qemu_host_page_mask = ~(qemu_host_page_size - 1);
286 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
288 #ifdef HAVE_KINFO_GETVMMAP
289 struct kinfo_vmentry *freep;
290 int i, cnt;
292 freep = kinfo_getvmmap(getpid(), &cnt);
293 if (freep) {
294 mmap_lock();
295 for (i = 0; i < cnt; i++) {
296 unsigned long startaddr, endaddr;
298 startaddr = freep[i].kve_start;
299 endaddr = freep[i].kve_end;
300 if (h2g_valid(startaddr)) {
301 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
303 if (h2g_valid(endaddr)) {
304 endaddr = h2g(endaddr);
305 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
306 } else {
307 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
308 endaddr = ~0ul;
309 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
310 #endif
314 free(freep);
315 mmap_unlock();
317 #else
318 FILE *f;
320 last_brk = (unsigned long)sbrk(0);
322 f = fopen("/compat/linux/proc/self/maps", "r");
323 if (f) {
324 mmap_lock();
326 do {
327 unsigned long startaddr, endaddr;
328 int n;
330 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
332 if (n == 2 && h2g_valid(startaddr)) {
333 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
335 if (h2g_valid(endaddr)) {
336 endaddr = h2g(endaddr);
337 } else {
338 endaddr = ~0ul;
340 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
342 } while (!feof(f));
344 fclose(f);
345 mmap_unlock();
347 #endif
349 #endif
352 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
354 PageDesc *pd;
355 void **lp;
356 int i;
358 #if defined(CONFIG_USER_ONLY)
359 /* We can't use g_malloc because it may recurse into a locked mutex. */
360 # define ALLOC(P, SIZE) \
361 do { \
362 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
363 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
364 } while (0)
365 #else
366 # define ALLOC(P, SIZE) \
367 do { P = g_malloc0(SIZE); } while (0)
368 #endif
370 /* Level 1. Always allocated. */
371 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
373 /* Level 2..N-1. */
374 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
375 void **p = *lp;
377 if (p == NULL) {
378 if (!alloc) {
379 return NULL;
381 ALLOC(p, sizeof(void *) * L2_SIZE);
382 *lp = p;
385 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
388 pd = *lp;
389 if (pd == NULL) {
390 if (!alloc) {
391 return NULL;
393 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
394 *lp = pd;
397 #undef ALLOC
399 return pd + (index & (L2_SIZE - 1));
402 static inline PageDesc *page_find(tb_page_addr_t index)
404 return page_find_alloc(index, 0);
407 #if !defined(CONFIG_USER_ONLY)
409 static void phys_map_node_reserve(unsigned nodes)
411 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
412 typedef PhysPageEntry Node[L2_SIZE];
413 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
414 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
415 phys_map_nodes_nb + nodes);
416 phys_map_nodes = g_renew(Node, phys_map_nodes,
417 phys_map_nodes_nb_alloc);
421 static uint16_t phys_map_node_alloc(void)
423 unsigned i;
424 uint16_t ret;
426 ret = phys_map_nodes_nb++;
427 assert(ret != PHYS_MAP_NODE_NIL);
428 assert(ret != phys_map_nodes_nb_alloc);
429 for (i = 0; i < L2_SIZE; ++i) {
430 phys_map_nodes[ret][i].is_leaf = 0;
431 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
433 return ret;
436 static void phys_map_nodes_reset(void)
438 phys_map_nodes_nb = 0;
442 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
443 target_phys_addr_t *nb, uint16_t leaf,
444 int level)
446 PhysPageEntry *p;
447 int i;
448 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
450 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
451 lp->ptr = phys_map_node_alloc();
452 p = phys_map_nodes[lp->ptr];
453 if (level == 0) {
454 for (i = 0; i < L2_SIZE; i++) {
455 p[i].is_leaf = 1;
456 p[i].ptr = phys_section_unassigned;
459 } else {
460 p = phys_map_nodes[lp->ptr];
462 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
464 while (*nb && lp < &p[L2_SIZE]) {
465 if ((*index & (step - 1)) == 0 && *nb >= step) {
466 lp->is_leaf = true;
467 lp->ptr = leaf;
468 *index += step;
469 *nb -= step;
470 } else {
471 phys_page_set_level(lp, index, nb, leaf, level - 1);
473 ++lp;
477 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
478 uint16_t leaf)
480 /* Wildly overreserve - it doesn't matter much. */
481 phys_map_node_reserve(3 * P_L2_LEVELS);
483 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
486 static MemoryRegionSection *phys_page_find(target_phys_addr_t index)
488 PhysPageEntry lp = phys_map;
489 PhysPageEntry *p;
490 int i;
491 uint16_t s_index = phys_section_unassigned;
493 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
494 if (lp.ptr == PHYS_MAP_NODE_NIL) {
495 goto not_found;
497 p = phys_map_nodes[lp.ptr];
498 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
501 s_index = lp.ptr;
502 not_found:
503 return &phys_sections[s_index];
506 static target_phys_addr_t section_addr(MemoryRegionSection *section,
507 target_phys_addr_t addr)
509 addr -= section->offset_within_address_space;
510 addr += section->offset_within_region;
511 return addr;
514 static void tlb_protect_code(ram_addr_t ram_addr);
515 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
516 target_ulong vaddr);
517 #define mmap_lock() do { } while(0)
518 #define mmap_unlock() do { } while(0)
519 #endif
521 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
523 #if defined(CONFIG_USER_ONLY)
524 /* Currently it is not recommended to allocate big chunks of data in
525 user mode. It will change when a dedicated libc will be used */
526 #define USE_STATIC_CODE_GEN_BUFFER
527 #endif
529 #ifdef USE_STATIC_CODE_GEN_BUFFER
530 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
531 __attribute__((aligned (CODE_GEN_ALIGN)));
532 #endif
534 static void code_gen_alloc(unsigned long tb_size)
536 #ifdef USE_STATIC_CODE_GEN_BUFFER
537 code_gen_buffer = static_code_gen_buffer;
538 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
539 map_exec(code_gen_buffer, code_gen_buffer_size);
540 #else
541 code_gen_buffer_size = tb_size;
542 if (code_gen_buffer_size == 0) {
543 #if defined(CONFIG_USER_ONLY)
544 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
545 #else
546 /* XXX: needs adjustments */
547 code_gen_buffer_size = (unsigned long)(ram_size / 4);
548 #endif
550 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
551 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
552 /* The code gen buffer location may have constraints depending on
553 the host cpu and OS */
554 #if defined(__linux__)
556 int flags;
557 void *start = NULL;
559 flags = MAP_PRIVATE | MAP_ANONYMOUS;
560 #if defined(__x86_64__)
561 flags |= MAP_32BIT;
562 /* Cannot map more than that */
563 if (code_gen_buffer_size > (800 * 1024 * 1024))
564 code_gen_buffer_size = (800 * 1024 * 1024);
565 #elif defined(__sparc_v9__)
566 // Map the buffer below 2G, so we can use direct calls and branches
567 flags |= MAP_FIXED;
568 start = (void *) 0x60000000UL;
569 if (code_gen_buffer_size > (512 * 1024 * 1024))
570 code_gen_buffer_size = (512 * 1024 * 1024);
571 #elif defined(__arm__)
572 /* Keep the buffer no bigger than 16MB to branch between blocks */
573 if (code_gen_buffer_size > 16 * 1024 * 1024)
574 code_gen_buffer_size = 16 * 1024 * 1024;
575 #elif defined(__s390x__)
576 /* Map the buffer so that we can use direct calls and branches. */
577 /* We have a +- 4GB range on the branches; leave some slop. */
578 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
579 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
581 start = (void *)0x90000000UL;
582 #endif
583 code_gen_buffer = mmap(start, code_gen_buffer_size,
584 PROT_WRITE | PROT_READ | PROT_EXEC,
585 flags, -1, 0);
586 if (code_gen_buffer == MAP_FAILED) {
587 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
588 exit(1);
591 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
592 || defined(__DragonFly__) || defined(__OpenBSD__) \
593 || defined(__NetBSD__)
595 int flags;
596 void *addr = NULL;
597 flags = MAP_PRIVATE | MAP_ANONYMOUS;
598 #if defined(__x86_64__)
599 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
600 * 0x40000000 is free */
601 flags |= MAP_FIXED;
602 addr = (void *)0x40000000;
603 /* Cannot map more than that */
604 if (code_gen_buffer_size > (800 * 1024 * 1024))
605 code_gen_buffer_size = (800 * 1024 * 1024);
606 #elif defined(__sparc_v9__)
607 // Map the buffer below 2G, so we can use direct calls and branches
608 flags |= MAP_FIXED;
609 addr = (void *) 0x60000000UL;
610 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
611 code_gen_buffer_size = (512 * 1024 * 1024);
613 #endif
614 code_gen_buffer = mmap(addr, code_gen_buffer_size,
615 PROT_WRITE | PROT_READ | PROT_EXEC,
616 flags, -1, 0);
617 if (code_gen_buffer == MAP_FAILED) {
618 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
619 exit(1);
622 #else
623 code_gen_buffer = g_malloc(code_gen_buffer_size);
624 map_exec(code_gen_buffer, code_gen_buffer_size);
625 #endif
626 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
627 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
628 code_gen_buffer_max_size = code_gen_buffer_size -
629 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
630 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
631 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
634 /* Must be called before using the QEMU cpus. 'tb_size' is the size
635 (in bytes) allocated to the translation buffer. Zero means default
636 size. */
637 void tcg_exec_init(unsigned long tb_size)
639 cpu_gen_init();
640 code_gen_alloc(tb_size);
641 code_gen_ptr = code_gen_buffer;
642 page_init();
643 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
644 /* There's no guest base to take into account, so go ahead and
645 initialize the prologue now. */
646 tcg_prologue_init(&tcg_ctx);
647 #endif
650 bool tcg_enabled(void)
652 return code_gen_buffer != NULL;
655 void cpu_exec_init_all(void)
657 #if !defined(CONFIG_USER_ONLY)
658 memory_map_init();
659 io_mem_init();
660 #endif
663 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
665 static int cpu_common_post_load(void *opaque, int version_id)
667 CPUState *env = opaque;
669 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
670 version_id is increased. */
671 env->interrupt_request &= ~0x01;
672 tlb_flush(env, 1);
674 return 0;
677 static const VMStateDescription vmstate_cpu_common = {
678 .name = "cpu_common",
679 .version_id = 1,
680 .minimum_version_id = 1,
681 .minimum_version_id_old = 1,
682 .post_load = cpu_common_post_load,
683 .fields = (VMStateField []) {
684 VMSTATE_UINT32(halted, CPUState),
685 VMSTATE_UINT32(interrupt_request, CPUState),
686 VMSTATE_END_OF_LIST()
689 #endif
691 CPUState *qemu_get_cpu(int cpu)
693 CPUState *env = first_cpu;
695 while (env) {
696 if (env->cpu_index == cpu)
697 break;
698 env = env->next_cpu;
701 return env;
704 void cpu_exec_init(CPUState *env)
706 CPUState **penv;
707 int cpu_index;
709 #if defined(CONFIG_USER_ONLY)
710 cpu_list_lock();
711 #endif
712 env->next_cpu = NULL;
713 penv = &first_cpu;
714 cpu_index = 0;
715 while (*penv != NULL) {
716 penv = &(*penv)->next_cpu;
717 cpu_index++;
719 env->cpu_index = cpu_index;
720 env->numa_node = 0;
721 QTAILQ_INIT(&env->breakpoints);
722 QTAILQ_INIT(&env->watchpoints);
723 #ifndef CONFIG_USER_ONLY
724 env->thread_id = qemu_get_thread_id();
725 #endif
726 *penv = env;
727 #if defined(CONFIG_USER_ONLY)
728 cpu_list_unlock();
729 #endif
730 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
731 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
732 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
733 cpu_save, cpu_load, env);
734 #endif
737 /* Allocate a new translation block. Flush the translation buffer if
738 too many translation blocks or too much generated code. */
739 static TranslationBlock *tb_alloc(target_ulong pc)
741 TranslationBlock *tb;
743 if (nb_tbs >= code_gen_max_blocks ||
744 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
745 return NULL;
746 tb = &tbs[nb_tbs++];
747 tb->pc = pc;
748 tb->cflags = 0;
749 return tb;
752 void tb_free(TranslationBlock *tb)
754 /* In practice this is mostly used for single use temporary TB
755 Ignore the hard cases and just back up if this TB happens to
756 be the last one generated. */
757 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
758 code_gen_ptr = tb->tc_ptr;
759 nb_tbs--;
763 static inline void invalidate_page_bitmap(PageDesc *p)
765 if (p->code_bitmap) {
766 g_free(p->code_bitmap);
767 p->code_bitmap = NULL;
769 p->code_write_count = 0;
772 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
774 static void page_flush_tb_1 (int level, void **lp)
776 int i;
778 if (*lp == NULL) {
779 return;
781 if (level == 0) {
782 PageDesc *pd = *lp;
783 for (i = 0; i < L2_SIZE; ++i) {
784 pd[i].first_tb = NULL;
785 invalidate_page_bitmap(pd + i);
787 } else {
788 void **pp = *lp;
789 for (i = 0; i < L2_SIZE; ++i) {
790 page_flush_tb_1 (level - 1, pp + i);
795 static void page_flush_tb(void)
797 int i;
798 for (i = 0; i < V_L1_SIZE; i++) {
799 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
803 /* flush all the translation blocks */
804 /* XXX: tb_flush is currently not thread safe */
805 void tb_flush(CPUState *env1)
807 CPUState *env;
808 #if defined(DEBUG_FLUSH)
809 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
810 (unsigned long)(code_gen_ptr - code_gen_buffer),
811 nb_tbs, nb_tbs > 0 ?
812 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
813 #endif
814 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
815 cpu_abort(env1, "Internal error: code buffer overflow\n");
817 nb_tbs = 0;
819 for(env = first_cpu; env != NULL; env = env->next_cpu) {
820 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
823 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
824 page_flush_tb();
826 code_gen_ptr = code_gen_buffer;
827 /* XXX: flush processor icache at this point if cache flush is
828 expensive */
829 tb_flush_count++;
832 #ifdef DEBUG_TB_CHECK
834 static void tb_invalidate_check(target_ulong address)
836 TranslationBlock *tb;
837 int i;
838 address &= TARGET_PAGE_MASK;
839 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
840 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
841 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
842 address >= tb->pc + tb->size)) {
843 printf("ERROR invalidate: address=" TARGET_FMT_lx
844 " PC=%08lx size=%04x\n",
845 address, (long)tb->pc, tb->size);
851 /* verify that all the pages have correct rights for code */
852 static void tb_page_check(void)
854 TranslationBlock *tb;
855 int i, flags1, flags2;
857 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
858 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
859 flags1 = page_get_flags(tb->pc);
860 flags2 = page_get_flags(tb->pc + tb->size - 1);
861 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
862 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
863 (long)tb->pc, tb->size, flags1, flags2);
869 #endif
871 /* invalidate one TB */
872 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
873 int next_offset)
875 TranslationBlock *tb1;
876 for(;;) {
877 tb1 = *ptb;
878 if (tb1 == tb) {
879 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
880 break;
882 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
886 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
888 TranslationBlock *tb1;
889 unsigned int n1;
891 for(;;) {
892 tb1 = *ptb;
893 n1 = (long)tb1 & 3;
894 tb1 = (TranslationBlock *)((long)tb1 & ~3);
895 if (tb1 == tb) {
896 *ptb = tb1->page_next[n1];
897 break;
899 ptb = &tb1->page_next[n1];
903 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
905 TranslationBlock *tb1, **ptb;
906 unsigned int n1;
908 ptb = &tb->jmp_next[n];
909 tb1 = *ptb;
910 if (tb1) {
911 /* find tb(n) in circular list */
912 for(;;) {
913 tb1 = *ptb;
914 n1 = (long)tb1 & 3;
915 tb1 = (TranslationBlock *)((long)tb1 & ~3);
916 if (n1 == n && tb1 == tb)
917 break;
918 if (n1 == 2) {
919 ptb = &tb1->jmp_first;
920 } else {
921 ptb = &tb1->jmp_next[n1];
924 /* now we can suppress tb(n) from the list */
925 *ptb = tb->jmp_next[n];
927 tb->jmp_next[n] = NULL;
931 /* reset the jump entry 'n' of a TB so that it is not chained to
932 another TB */
933 static inline void tb_reset_jump(TranslationBlock *tb, int n)
935 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
938 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
940 CPUState *env;
941 PageDesc *p;
942 unsigned int h, n1;
943 tb_page_addr_t phys_pc;
944 TranslationBlock *tb1, *tb2;
946 /* remove the TB from the hash list */
947 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
948 h = tb_phys_hash_func(phys_pc);
949 tb_remove(&tb_phys_hash[h], tb,
950 offsetof(TranslationBlock, phys_hash_next));
952 /* remove the TB from the page list */
953 if (tb->page_addr[0] != page_addr) {
954 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
955 tb_page_remove(&p->first_tb, tb);
956 invalidate_page_bitmap(p);
958 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
959 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
960 tb_page_remove(&p->first_tb, tb);
961 invalidate_page_bitmap(p);
964 tb_invalidated_flag = 1;
966 /* remove the TB from the hash list */
967 h = tb_jmp_cache_hash_func(tb->pc);
968 for(env = first_cpu; env != NULL; env = env->next_cpu) {
969 if (env->tb_jmp_cache[h] == tb)
970 env->tb_jmp_cache[h] = NULL;
973 /* suppress this TB from the two jump lists */
974 tb_jmp_remove(tb, 0);
975 tb_jmp_remove(tb, 1);
977 /* suppress any remaining jumps to this TB */
978 tb1 = tb->jmp_first;
979 for(;;) {
980 n1 = (long)tb1 & 3;
981 if (n1 == 2)
982 break;
983 tb1 = (TranslationBlock *)((long)tb1 & ~3);
984 tb2 = tb1->jmp_next[n1];
985 tb_reset_jump(tb1, n1);
986 tb1->jmp_next[n1] = NULL;
987 tb1 = tb2;
989 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
991 tb_phys_invalidate_count++;
994 static inline void set_bits(uint8_t *tab, int start, int len)
996 int end, mask, end1;
998 end = start + len;
999 tab += start >> 3;
1000 mask = 0xff << (start & 7);
1001 if ((start & ~7) == (end & ~7)) {
1002 if (start < end) {
1003 mask &= ~(0xff << (end & 7));
1004 *tab |= mask;
1006 } else {
1007 *tab++ |= mask;
1008 start = (start + 8) & ~7;
1009 end1 = end & ~7;
1010 while (start < end1) {
1011 *tab++ = 0xff;
1012 start += 8;
1014 if (start < end) {
1015 mask = ~(0xff << (end & 7));
1016 *tab |= mask;
1021 static void build_page_bitmap(PageDesc *p)
1023 int n, tb_start, tb_end;
1024 TranslationBlock *tb;
1026 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1028 tb = p->first_tb;
1029 while (tb != NULL) {
1030 n = (long)tb & 3;
1031 tb = (TranslationBlock *)((long)tb & ~3);
1032 /* NOTE: this is subtle as a TB may span two physical pages */
1033 if (n == 0) {
1034 /* NOTE: tb_end may be after the end of the page, but
1035 it is not a problem */
1036 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1037 tb_end = tb_start + tb->size;
1038 if (tb_end > TARGET_PAGE_SIZE)
1039 tb_end = TARGET_PAGE_SIZE;
1040 } else {
1041 tb_start = 0;
1042 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1044 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1045 tb = tb->page_next[n];
1049 TranslationBlock *tb_gen_code(CPUState *env,
1050 target_ulong pc, target_ulong cs_base,
1051 int flags, int cflags)
1053 TranslationBlock *tb;
1054 uint8_t *tc_ptr;
1055 tb_page_addr_t phys_pc, phys_page2;
1056 target_ulong virt_page2;
1057 int code_gen_size;
1059 phys_pc = get_page_addr_code(env, pc);
1060 tb = tb_alloc(pc);
1061 if (!tb) {
1062 /* flush must be done */
1063 tb_flush(env);
1064 /* cannot fail at this point */
1065 tb = tb_alloc(pc);
1066 /* Don't forget to invalidate previous TB info. */
1067 tb_invalidated_flag = 1;
1069 tc_ptr = code_gen_ptr;
1070 tb->tc_ptr = tc_ptr;
1071 tb->cs_base = cs_base;
1072 tb->flags = flags;
1073 tb->cflags = cflags;
1074 cpu_gen_code(env, tb, &code_gen_size);
1075 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1077 /* check next page if needed */
1078 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1079 phys_page2 = -1;
1080 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1081 phys_page2 = get_page_addr_code(env, virt_page2);
1083 tb_link_page(tb, phys_pc, phys_page2);
1084 return tb;
1087 /* invalidate all TBs which intersect with the target physical page
1088 starting in range [start;end[. NOTE: start and end must refer to
1089 the same physical page. 'is_cpu_write_access' should be true if called
1090 from a real cpu write access: the virtual CPU will exit the current
1091 TB if code is modified inside this TB. */
1092 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1093 int is_cpu_write_access)
1095 TranslationBlock *tb, *tb_next, *saved_tb;
1096 CPUState *env = cpu_single_env;
1097 tb_page_addr_t tb_start, tb_end;
1098 PageDesc *p;
1099 int n;
1100 #ifdef TARGET_HAS_PRECISE_SMC
1101 int current_tb_not_found = is_cpu_write_access;
1102 TranslationBlock *current_tb = NULL;
1103 int current_tb_modified = 0;
1104 target_ulong current_pc = 0;
1105 target_ulong current_cs_base = 0;
1106 int current_flags = 0;
1107 #endif /* TARGET_HAS_PRECISE_SMC */
1109 p = page_find(start >> TARGET_PAGE_BITS);
1110 if (!p)
1111 return;
1112 if (!p->code_bitmap &&
1113 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1114 is_cpu_write_access) {
1115 /* build code bitmap */
1116 build_page_bitmap(p);
1119 /* we remove all the TBs in the range [start, end[ */
1120 /* XXX: see if in some cases it could be faster to invalidate all the code */
1121 tb = p->first_tb;
1122 while (tb != NULL) {
1123 n = (long)tb & 3;
1124 tb = (TranslationBlock *)((long)tb & ~3);
1125 tb_next = tb->page_next[n];
1126 /* NOTE: this is subtle as a TB may span two physical pages */
1127 if (n == 0) {
1128 /* NOTE: tb_end may be after the end of the page, but
1129 it is not a problem */
1130 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1131 tb_end = tb_start + tb->size;
1132 } else {
1133 tb_start = tb->page_addr[1];
1134 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1136 if (!(tb_end <= start || tb_start >= end)) {
1137 #ifdef TARGET_HAS_PRECISE_SMC
1138 if (current_tb_not_found) {
1139 current_tb_not_found = 0;
1140 current_tb = NULL;
1141 if (env->mem_io_pc) {
1142 /* now we have a real cpu fault */
1143 current_tb = tb_find_pc(env->mem_io_pc);
1146 if (current_tb == tb &&
1147 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1148 /* If we are modifying the current TB, we must stop
1149 its execution. We could be more precise by checking
1150 that the modification is after the current PC, but it
1151 would require a specialized function to partially
1152 restore the CPU state */
1154 current_tb_modified = 1;
1155 cpu_restore_state(current_tb, env, env->mem_io_pc);
1156 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1157 &current_flags);
1159 #endif /* TARGET_HAS_PRECISE_SMC */
1160 /* we need to do that to handle the case where a signal
1161 occurs while doing tb_phys_invalidate() */
1162 saved_tb = NULL;
1163 if (env) {
1164 saved_tb = env->current_tb;
1165 env->current_tb = NULL;
1167 tb_phys_invalidate(tb, -1);
1168 if (env) {
1169 env->current_tb = saved_tb;
1170 if (env->interrupt_request && env->current_tb)
1171 cpu_interrupt(env, env->interrupt_request);
1174 tb = tb_next;
1176 #if !defined(CONFIG_USER_ONLY)
1177 /* if no code remaining, no need to continue to use slow writes */
1178 if (!p->first_tb) {
1179 invalidate_page_bitmap(p);
1180 if (is_cpu_write_access) {
1181 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1184 #endif
1185 #ifdef TARGET_HAS_PRECISE_SMC
1186 if (current_tb_modified) {
1187 /* we generate a block containing just the instruction
1188 modifying the memory. It will ensure that it cannot modify
1189 itself */
1190 env->current_tb = NULL;
1191 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1192 cpu_resume_from_signal(env, NULL);
1194 #endif
1197 /* len must be <= 8 and start must be a multiple of len */
1198 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1200 PageDesc *p;
1201 int offset, b;
1202 #if 0
1203 if (1) {
1204 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1205 cpu_single_env->mem_io_vaddr, len,
1206 cpu_single_env->eip,
1207 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1209 #endif
1210 p = page_find(start >> TARGET_PAGE_BITS);
1211 if (!p)
1212 return;
1213 if (p->code_bitmap) {
1214 offset = start & ~TARGET_PAGE_MASK;
1215 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1216 if (b & ((1 << len) - 1))
1217 goto do_invalidate;
1218 } else {
1219 do_invalidate:
1220 tb_invalidate_phys_page_range(start, start + len, 1);
1224 #if !defined(CONFIG_SOFTMMU)
1225 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1226 unsigned long pc, void *puc)
1228 TranslationBlock *tb;
1229 PageDesc *p;
1230 int n;
1231 #ifdef TARGET_HAS_PRECISE_SMC
1232 TranslationBlock *current_tb = NULL;
1233 CPUState *env = cpu_single_env;
1234 int current_tb_modified = 0;
1235 target_ulong current_pc = 0;
1236 target_ulong current_cs_base = 0;
1237 int current_flags = 0;
1238 #endif
1240 addr &= TARGET_PAGE_MASK;
1241 p = page_find(addr >> TARGET_PAGE_BITS);
1242 if (!p)
1243 return;
1244 tb = p->first_tb;
1245 #ifdef TARGET_HAS_PRECISE_SMC
1246 if (tb && pc != 0) {
1247 current_tb = tb_find_pc(pc);
1249 #endif
1250 while (tb != NULL) {
1251 n = (long)tb & 3;
1252 tb = (TranslationBlock *)((long)tb & ~3);
1253 #ifdef TARGET_HAS_PRECISE_SMC
1254 if (current_tb == tb &&
1255 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1256 /* If we are modifying the current TB, we must stop
1257 its execution. We could be more precise by checking
1258 that the modification is after the current PC, but it
1259 would require a specialized function to partially
1260 restore the CPU state */
1262 current_tb_modified = 1;
1263 cpu_restore_state(current_tb, env, pc);
1264 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1265 &current_flags);
1267 #endif /* TARGET_HAS_PRECISE_SMC */
1268 tb_phys_invalidate(tb, addr);
1269 tb = tb->page_next[n];
1271 p->first_tb = NULL;
1272 #ifdef TARGET_HAS_PRECISE_SMC
1273 if (current_tb_modified) {
1274 /* we generate a block containing just the instruction
1275 modifying the memory. It will ensure that it cannot modify
1276 itself */
1277 env->current_tb = NULL;
1278 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1279 cpu_resume_from_signal(env, puc);
1281 #endif
1283 #endif
1285 /* add the tb in the target page and protect it if necessary */
1286 static inline void tb_alloc_page(TranslationBlock *tb,
1287 unsigned int n, tb_page_addr_t page_addr)
1289 PageDesc *p;
1290 #ifndef CONFIG_USER_ONLY
1291 bool page_already_protected;
1292 #endif
1294 tb->page_addr[n] = page_addr;
1295 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1296 tb->page_next[n] = p->first_tb;
1297 #ifndef CONFIG_USER_ONLY
1298 page_already_protected = p->first_tb != NULL;
1299 #endif
1300 p->first_tb = (TranslationBlock *)((long)tb | n);
1301 invalidate_page_bitmap(p);
1303 #if defined(TARGET_HAS_SMC) || 1
1305 #if defined(CONFIG_USER_ONLY)
1306 if (p->flags & PAGE_WRITE) {
1307 target_ulong addr;
1308 PageDesc *p2;
1309 int prot;
1311 /* force the host page as non writable (writes will have a
1312 page fault + mprotect overhead) */
1313 page_addr &= qemu_host_page_mask;
1314 prot = 0;
1315 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1316 addr += TARGET_PAGE_SIZE) {
1318 p2 = page_find (addr >> TARGET_PAGE_BITS);
1319 if (!p2)
1320 continue;
1321 prot |= p2->flags;
1322 p2->flags &= ~PAGE_WRITE;
1324 mprotect(g2h(page_addr), qemu_host_page_size,
1325 (prot & PAGE_BITS) & ~PAGE_WRITE);
1326 #ifdef DEBUG_TB_INVALIDATE
1327 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1328 page_addr);
1329 #endif
1331 #else
1332 /* if some code is already present, then the pages are already
1333 protected. So we handle the case where only the first TB is
1334 allocated in a physical page */
1335 if (!page_already_protected) {
1336 tlb_protect_code(page_addr);
1338 #endif
1340 #endif /* TARGET_HAS_SMC */
1343 /* add a new TB and link it to the physical page tables. phys_page2 is
1344 (-1) to indicate that only one page contains the TB. */
1345 void tb_link_page(TranslationBlock *tb,
1346 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1348 unsigned int h;
1349 TranslationBlock **ptb;
1351 /* Grab the mmap lock to stop another thread invalidating this TB
1352 before we are done. */
1353 mmap_lock();
1354 /* add in the physical hash table */
1355 h = tb_phys_hash_func(phys_pc);
1356 ptb = &tb_phys_hash[h];
1357 tb->phys_hash_next = *ptb;
1358 *ptb = tb;
1360 /* add in the page list */
1361 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1362 if (phys_page2 != -1)
1363 tb_alloc_page(tb, 1, phys_page2);
1364 else
1365 tb->page_addr[1] = -1;
1367 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1368 tb->jmp_next[0] = NULL;
1369 tb->jmp_next[1] = NULL;
1371 /* init original jump addresses */
1372 if (tb->tb_next_offset[0] != 0xffff)
1373 tb_reset_jump(tb, 0);
1374 if (tb->tb_next_offset[1] != 0xffff)
1375 tb_reset_jump(tb, 1);
1377 #ifdef DEBUG_TB_CHECK
1378 tb_page_check();
1379 #endif
1380 mmap_unlock();
1383 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1384 tb[1].tc_ptr. Return NULL if not found */
1385 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1387 int m_min, m_max, m;
1388 unsigned long v;
1389 TranslationBlock *tb;
1391 if (nb_tbs <= 0)
1392 return NULL;
1393 if (tc_ptr < (unsigned long)code_gen_buffer ||
1394 tc_ptr >= (unsigned long)code_gen_ptr)
1395 return NULL;
1396 /* binary search (cf Knuth) */
1397 m_min = 0;
1398 m_max = nb_tbs - 1;
1399 while (m_min <= m_max) {
1400 m = (m_min + m_max) >> 1;
1401 tb = &tbs[m];
1402 v = (unsigned long)tb->tc_ptr;
1403 if (v == tc_ptr)
1404 return tb;
1405 else if (tc_ptr < v) {
1406 m_max = m - 1;
1407 } else {
1408 m_min = m + 1;
1411 return &tbs[m_max];
1414 static void tb_reset_jump_recursive(TranslationBlock *tb);
1416 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1418 TranslationBlock *tb1, *tb_next, **ptb;
1419 unsigned int n1;
1421 tb1 = tb->jmp_next[n];
1422 if (tb1 != NULL) {
1423 /* find head of list */
1424 for(;;) {
1425 n1 = (long)tb1 & 3;
1426 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1427 if (n1 == 2)
1428 break;
1429 tb1 = tb1->jmp_next[n1];
1431 /* we are now sure now that tb jumps to tb1 */
1432 tb_next = tb1;
1434 /* remove tb from the jmp_first list */
1435 ptb = &tb_next->jmp_first;
1436 for(;;) {
1437 tb1 = *ptb;
1438 n1 = (long)tb1 & 3;
1439 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1440 if (n1 == n && tb1 == tb)
1441 break;
1442 ptb = &tb1->jmp_next[n1];
1444 *ptb = tb->jmp_next[n];
1445 tb->jmp_next[n] = NULL;
1447 /* suppress the jump to next tb in generated code */
1448 tb_reset_jump(tb, n);
1450 /* suppress jumps in the tb on which we could have jumped */
1451 tb_reset_jump_recursive(tb_next);
1455 static void tb_reset_jump_recursive(TranslationBlock *tb)
1457 tb_reset_jump_recursive2(tb, 0);
1458 tb_reset_jump_recursive2(tb, 1);
1461 #if defined(TARGET_HAS_ICE)
1462 #if defined(CONFIG_USER_ONLY)
1463 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1465 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1467 #else
1468 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1470 target_phys_addr_t addr;
1471 ram_addr_t ram_addr;
1472 MemoryRegionSection *section;
1474 addr = cpu_get_phys_page_debug(env, pc);
1475 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1476 if (!(memory_region_is_ram(section->mr)
1477 || (section->mr->rom_device && section->mr->readable))) {
1478 return;
1480 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1481 + section_addr(section, addr);
1482 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1484 #endif
1485 #endif /* TARGET_HAS_ICE */
1487 #if defined(CONFIG_USER_ONLY)
1488 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1493 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1494 int flags, CPUWatchpoint **watchpoint)
1496 return -ENOSYS;
1498 #else
1499 /* Add a watchpoint. */
1500 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1501 int flags, CPUWatchpoint **watchpoint)
1503 target_ulong len_mask = ~(len - 1);
1504 CPUWatchpoint *wp;
1506 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1507 if ((len & (len - 1)) || (addr & ~len_mask) ||
1508 len == 0 || len > TARGET_PAGE_SIZE) {
1509 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1510 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1511 return -EINVAL;
1513 wp = g_malloc(sizeof(*wp));
1515 wp->vaddr = addr;
1516 wp->len_mask = len_mask;
1517 wp->flags = flags;
1519 /* keep all GDB-injected watchpoints in front */
1520 if (flags & BP_GDB)
1521 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1522 else
1523 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1525 tlb_flush_page(env, addr);
1527 if (watchpoint)
1528 *watchpoint = wp;
1529 return 0;
1532 /* Remove a specific watchpoint. */
1533 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1534 int flags)
1536 target_ulong len_mask = ~(len - 1);
1537 CPUWatchpoint *wp;
1539 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1540 if (addr == wp->vaddr && len_mask == wp->len_mask
1541 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1542 cpu_watchpoint_remove_by_ref(env, wp);
1543 return 0;
1546 return -ENOENT;
1549 /* Remove a specific watchpoint by reference. */
1550 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1552 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1554 tlb_flush_page(env, watchpoint->vaddr);
1556 g_free(watchpoint);
1559 /* Remove all matching watchpoints. */
1560 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1562 CPUWatchpoint *wp, *next;
1564 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1565 if (wp->flags & mask)
1566 cpu_watchpoint_remove_by_ref(env, wp);
1569 #endif
1571 /* Add a breakpoint. */
1572 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1573 CPUBreakpoint **breakpoint)
1575 #if defined(TARGET_HAS_ICE)
1576 CPUBreakpoint *bp;
1578 bp = g_malloc(sizeof(*bp));
1580 bp->pc = pc;
1581 bp->flags = flags;
1583 /* keep all GDB-injected breakpoints in front */
1584 if (flags & BP_GDB)
1585 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1586 else
1587 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1589 breakpoint_invalidate(env, pc);
1591 if (breakpoint)
1592 *breakpoint = bp;
1593 return 0;
1594 #else
1595 return -ENOSYS;
1596 #endif
1599 /* Remove a specific breakpoint. */
1600 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1602 #if defined(TARGET_HAS_ICE)
1603 CPUBreakpoint *bp;
1605 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1606 if (bp->pc == pc && bp->flags == flags) {
1607 cpu_breakpoint_remove_by_ref(env, bp);
1608 return 0;
1611 return -ENOENT;
1612 #else
1613 return -ENOSYS;
1614 #endif
1617 /* Remove a specific breakpoint by reference. */
1618 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1620 #if defined(TARGET_HAS_ICE)
1621 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1623 breakpoint_invalidate(env, breakpoint->pc);
1625 g_free(breakpoint);
1626 #endif
1629 /* Remove all matching breakpoints. */
1630 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1632 #if defined(TARGET_HAS_ICE)
1633 CPUBreakpoint *bp, *next;
1635 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1636 if (bp->flags & mask)
1637 cpu_breakpoint_remove_by_ref(env, bp);
1639 #endif
1642 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1643 CPU loop after each instruction */
1644 void cpu_single_step(CPUState *env, int enabled)
1646 #if defined(TARGET_HAS_ICE)
1647 if (env->singlestep_enabled != enabled) {
1648 env->singlestep_enabled = enabled;
1649 if (kvm_enabled())
1650 kvm_update_guest_debug(env, 0);
1651 else {
1652 /* must flush all the translated code to avoid inconsistencies */
1653 /* XXX: only flush what is necessary */
1654 tb_flush(env);
1657 #endif
1660 /* enable or disable low levels log */
1661 void cpu_set_log(int log_flags)
1663 loglevel = log_flags;
1664 if (loglevel && !logfile) {
1665 logfile = fopen(logfilename, log_append ? "a" : "w");
1666 if (!logfile) {
1667 perror(logfilename);
1668 _exit(1);
1670 #if !defined(CONFIG_SOFTMMU)
1671 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1673 static char logfile_buf[4096];
1674 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1676 #elif defined(_WIN32)
1677 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1678 setvbuf(logfile, NULL, _IONBF, 0);
1679 #else
1680 setvbuf(logfile, NULL, _IOLBF, 0);
1681 #endif
1682 log_append = 1;
1684 if (!loglevel && logfile) {
1685 fclose(logfile);
1686 logfile = NULL;
1690 void cpu_set_log_filename(const char *filename)
1692 logfilename = strdup(filename);
1693 if (logfile) {
1694 fclose(logfile);
1695 logfile = NULL;
1697 cpu_set_log(loglevel);
1700 static void cpu_unlink_tb(CPUState *env)
1702 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1703 problem and hope the cpu will stop of its own accord. For userspace
1704 emulation this often isn't actually as bad as it sounds. Often
1705 signals are used primarily to interrupt blocking syscalls. */
1706 TranslationBlock *tb;
1707 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1709 spin_lock(&interrupt_lock);
1710 tb = env->current_tb;
1711 /* if the cpu is currently executing code, we must unlink it and
1712 all the potentially executing TB */
1713 if (tb) {
1714 env->current_tb = NULL;
1715 tb_reset_jump_recursive(tb);
1717 spin_unlock(&interrupt_lock);
1720 #ifndef CONFIG_USER_ONLY
1721 /* mask must never be zero, except for A20 change call */
1722 static void tcg_handle_interrupt(CPUState *env, int mask)
1724 int old_mask;
1726 old_mask = env->interrupt_request;
1727 env->interrupt_request |= mask;
1730 * If called from iothread context, wake the target cpu in
1731 * case its halted.
1733 if (!qemu_cpu_is_self(env)) {
1734 qemu_cpu_kick(env);
1735 return;
1738 if (use_icount) {
1739 env->icount_decr.u16.high = 0xffff;
1740 if (!can_do_io(env)
1741 && (mask & ~old_mask) != 0) {
1742 cpu_abort(env, "Raised interrupt while not in I/O function");
1744 } else {
1745 cpu_unlink_tb(env);
1749 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1751 #else /* CONFIG_USER_ONLY */
1753 void cpu_interrupt(CPUState *env, int mask)
1755 env->interrupt_request |= mask;
1756 cpu_unlink_tb(env);
1758 #endif /* CONFIG_USER_ONLY */
1760 void cpu_reset_interrupt(CPUState *env, int mask)
1762 env->interrupt_request &= ~mask;
1765 void cpu_exit(CPUState *env)
1767 env->exit_request = 1;
1768 cpu_unlink_tb(env);
1771 const CPULogItem cpu_log_items[] = {
1772 { CPU_LOG_TB_OUT_ASM, "out_asm",
1773 "show generated host assembly code for each compiled TB" },
1774 { CPU_LOG_TB_IN_ASM, "in_asm",
1775 "show target assembly code for each compiled TB" },
1776 { CPU_LOG_TB_OP, "op",
1777 "show micro ops for each compiled TB" },
1778 { CPU_LOG_TB_OP_OPT, "op_opt",
1779 "show micro ops "
1780 #ifdef TARGET_I386
1781 "before eflags optimization and "
1782 #endif
1783 "after liveness analysis" },
1784 { CPU_LOG_INT, "int",
1785 "show interrupts/exceptions in short format" },
1786 { CPU_LOG_EXEC, "exec",
1787 "show trace before each executed TB (lots of logs)" },
1788 { CPU_LOG_TB_CPU, "cpu",
1789 "show CPU state before block translation" },
1790 #ifdef TARGET_I386
1791 { CPU_LOG_PCALL, "pcall",
1792 "show protected mode far calls/returns/exceptions" },
1793 { CPU_LOG_RESET, "cpu_reset",
1794 "show CPU state before CPU resets" },
1795 #endif
1796 #ifdef DEBUG_IOPORT
1797 { CPU_LOG_IOPORT, "ioport",
1798 "show all i/o ports accesses" },
1799 #endif
1800 { 0, NULL, NULL },
1803 static int cmp1(const char *s1, int n, const char *s2)
1805 if (strlen(s2) != n)
1806 return 0;
1807 return memcmp(s1, s2, n) == 0;
1810 /* takes a comma separated list of log masks. Return 0 if error. */
1811 int cpu_str_to_log_mask(const char *str)
1813 const CPULogItem *item;
1814 int mask;
1815 const char *p, *p1;
1817 p = str;
1818 mask = 0;
1819 for(;;) {
1820 p1 = strchr(p, ',');
1821 if (!p1)
1822 p1 = p + strlen(p);
1823 if(cmp1(p,p1-p,"all")) {
1824 for(item = cpu_log_items; item->mask != 0; item++) {
1825 mask |= item->mask;
1827 } else {
1828 for(item = cpu_log_items; item->mask != 0; item++) {
1829 if (cmp1(p, p1 - p, item->name))
1830 goto found;
1832 return 0;
1834 found:
1835 mask |= item->mask;
1836 if (*p1 != ',')
1837 break;
1838 p = p1 + 1;
1840 return mask;
1843 void cpu_abort(CPUState *env, const char *fmt, ...)
1845 va_list ap;
1846 va_list ap2;
1848 va_start(ap, fmt);
1849 va_copy(ap2, ap);
1850 fprintf(stderr, "qemu: fatal: ");
1851 vfprintf(stderr, fmt, ap);
1852 fprintf(stderr, "\n");
1853 #ifdef TARGET_I386
1854 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1855 #else
1856 cpu_dump_state(env, stderr, fprintf, 0);
1857 #endif
1858 if (qemu_log_enabled()) {
1859 qemu_log("qemu: fatal: ");
1860 qemu_log_vprintf(fmt, ap2);
1861 qemu_log("\n");
1862 #ifdef TARGET_I386
1863 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1864 #else
1865 log_cpu_state(env, 0);
1866 #endif
1867 qemu_log_flush();
1868 qemu_log_close();
1870 va_end(ap2);
1871 va_end(ap);
1872 #if defined(CONFIG_USER_ONLY)
1874 struct sigaction act;
1875 sigfillset(&act.sa_mask);
1876 act.sa_handler = SIG_DFL;
1877 sigaction(SIGABRT, &act, NULL);
1879 #endif
1880 abort();
1883 CPUState *cpu_copy(CPUState *env)
1885 CPUState *new_env = cpu_init(env->cpu_model_str);
1886 CPUState *next_cpu = new_env->next_cpu;
1887 int cpu_index = new_env->cpu_index;
1888 #if defined(TARGET_HAS_ICE)
1889 CPUBreakpoint *bp;
1890 CPUWatchpoint *wp;
1891 #endif
1893 memcpy(new_env, env, sizeof(CPUState));
1895 /* Preserve chaining and index. */
1896 new_env->next_cpu = next_cpu;
1897 new_env->cpu_index = cpu_index;
1899 /* Clone all break/watchpoints.
1900 Note: Once we support ptrace with hw-debug register access, make sure
1901 BP_CPU break/watchpoints are handled correctly on clone. */
1902 QTAILQ_INIT(&env->breakpoints);
1903 QTAILQ_INIT(&env->watchpoints);
1904 #if defined(TARGET_HAS_ICE)
1905 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1906 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1908 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1909 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1910 wp->flags, NULL);
1912 #endif
1914 return new_env;
1917 #if !defined(CONFIG_USER_ONLY)
1919 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1921 unsigned int i;
1923 /* Discard jump cache entries for any tb which might potentially
1924 overlap the flushed page. */
1925 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1926 memset (&env->tb_jmp_cache[i], 0,
1927 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1929 i = tb_jmp_cache_hash_page(addr);
1930 memset (&env->tb_jmp_cache[i], 0,
1931 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1934 static CPUTLBEntry s_cputlb_empty_entry = {
1935 .addr_read = -1,
1936 .addr_write = -1,
1937 .addr_code = -1,
1938 .addend = -1,
1941 /* NOTE:
1942 * If flush_global is true (the usual case), flush all tlb entries.
1943 * If flush_global is false, flush (at least) all tlb entries not
1944 * marked global.
1946 * Since QEMU doesn't currently implement a global/not-global flag
1947 * for tlb entries, at the moment tlb_flush() will also flush all
1948 * tlb entries in the flush_global == false case. This is OK because
1949 * CPU architectures generally permit an implementation to drop
1950 * entries from the TLB at any time, so flushing more entries than
1951 * required is only an efficiency issue, not a correctness issue.
1953 void tlb_flush(CPUState *env, int flush_global)
1955 int i;
1957 #if defined(DEBUG_TLB)
1958 printf("tlb_flush:\n");
1959 #endif
1960 /* must reset current TB so that interrupts cannot modify the
1961 links while we are modifying them */
1962 env->current_tb = NULL;
1964 for(i = 0; i < CPU_TLB_SIZE; i++) {
1965 int mmu_idx;
1966 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1967 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1971 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1973 env->tlb_flush_addr = -1;
1974 env->tlb_flush_mask = 0;
1975 tlb_flush_count++;
1978 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1980 if (addr == (tlb_entry->addr_read &
1981 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1982 addr == (tlb_entry->addr_write &
1983 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1984 addr == (tlb_entry->addr_code &
1985 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1986 *tlb_entry = s_cputlb_empty_entry;
1990 void tlb_flush_page(CPUState *env, target_ulong addr)
1992 int i;
1993 int mmu_idx;
1995 #if defined(DEBUG_TLB)
1996 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1997 #endif
1998 /* Check if we need to flush due to large pages. */
1999 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2000 #if defined(DEBUG_TLB)
2001 printf("tlb_flush_page: forced full flush ("
2002 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2003 env->tlb_flush_addr, env->tlb_flush_mask);
2004 #endif
2005 tlb_flush(env, 1);
2006 return;
2008 /* must reset current TB so that interrupts cannot modify the
2009 links while we are modifying them */
2010 env->current_tb = NULL;
2012 addr &= TARGET_PAGE_MASK;
2013 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2014 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2015 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2017 tlb_flush_jmp_cache(env, addr);
2020 /* update the TLBs so that writes to code in the virtual page 'addr'
2021 can be detected */
2022 static void tlb_protect_code(ram_addr_t ram_addr)
2024 cpu_physical_memory_reset_dirty(ram_addr,
2025 ram_addr + TARGET_PAGE_SIZE,
2026 CODE_DIRTY_FLAG);
2029 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2030 tested for self modifying code */
2031 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2032 target_ulong vaddr)
2034 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2037 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2038 unsigned long start, unsigned long length)
2040 unsigned long addr;
2041 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2042 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2043 if ((addr - start) < length) {
2044 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2049 /* Note: start and end must be within the same ram block. */
2050 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2051 int dirty_flags)
2053 CPUState *env;
2054 unsigned long length, start1;
2055 int i;
2057 start &= TARGET_PAGE_MASK;
2058 end = TARGET_PAGE_ALIGN(end);
2060 length = end - start;
2061 if (length == 0)
2062 return;
2063 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2065 /* we modify the TLB cache so that the dirty bit will be set again
2066 when accessing the range */
2067 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2068 /* Check that we don't span multiple blocks - this breaks the
2069 address comparisons below. */
2070 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2071 != (end - 1) - start) {
2072 abort();
2075 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2076 int mmu_idx;
2077 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2078 for(i = 0; i < CPU_TLB_SIZE; i++)
2079 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2080 start1, length);
2085 int cpu_physical_memory_set_dirty_tracking(int enable)
2087 int ret = 0;
2088 in_migration = enable;
2089 return ret;
2092 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2094 ram_addr_t ram_addr;
2095 void *p;
2097 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2098 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2099 + tlb_entry->addend);
2100 ram_addr = qemu_ram_addr_from_host_nofail(p);
2101 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2102 tlb_entry->addr_write |= TLB_NOTDIRTY;
2107 /* update the TLB according to the current state of the dirty bits */
2108 void cpu_tlb_update_dirty(CPUState *env)
2110 int i;
2111 int mmu_idx;
2112 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2113 for(i = 0; i < CPU_TLB_SIZE; i++)
2114 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2118 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2120 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2121 tlb_entry->addr_write = vaddr;
2124 /* update the TLB corresponding to virtual page vaddr
2125 so that it is no longer dirty */
2126 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2128 int i;
2129 int mmu_idx;
2131 vaddr &= TARGET_PAGE_MASK;
2132 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2133 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2134 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2137 /* Our TLB does not support large pages, so remember the area covered by
2138 large pages and trigger a full TLB flush if these are invalidated. */
2139 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2140 target_ulong size)
2142 target_ulong mask = ~(size - 1);
2144 if (env->tlb_flush_addr == (target_ulong)-1) {
2145 env->tlb_flush_addr = vaddr & mask;
2146 env->tlb_flush_mask = mask;
2147 return;
2149 /* Extend the existing region to include the new page.
2150 This is a compromise between unnecessary flushes and the cost
2151 of maintaining a full variable size TLB. */
2152 mask &= env->tlb_flush_mask;
2153 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2154 mask <<= 1;
2156 env->tlb_flush_addr &= mask;
2157 env->tlb_flush_mask = mask;
2160 static bool is_ram_rom(MemoryRegionSection *s)
2162 return memory_region_is_ram(s->mr);
2165 static bool is_romd(MemoryRegionSection *s)
2167 MemoryRegion *mr = s->mr;
2169 return mr->rom_device && mr->readable;
2172 static bool is_ram_rom_romd(MemoryRegionSection *s)
2174 return is_ram_rom(s) || is_romd(s);
2177 /* Add a new TLB entry. At most one entry for a given virtual address
2178 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2179 supplied size is only used by tlb_flush_page. */
2180 void tlb_set_page(CPUState *env, target_ulong vaddr,
2181 target_phys_addr_t paddr, int prot,
2182 int mmu_idx, target_ulong size)
2184 MemoryRegionSection *section;
2185 unsigned int index;
2186 target_ulong address;
2187 target_ulong code_address;
2188 unsigned long addend;
2189 CPUTLBEntry *te;
2190 CPUWatchpoint *wp;
2191 target_phys_addr_t iotlb;
2193 assert(size >= TARGET_PAGE_SIZE);
2194 if (size != TARGET_PAGE_SIZE) {
2195 tlb_add_large_page(env, vaddr, size);
2197 section = phys_page_find(paddr >> TARGET_PAGE_BITS);
2198 #if defined(DEBUG_TLB)
2199 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2200 " prot=%x idx=%d pd=0x%08lx\n",
2201 vaddr, paddr, prot, mmu_idx, pd);
2202 #endif
2204 address = vaddr;
2205 if (!is_ram_rom_romd(section)) {
2206 /* IO memory case (romd handled later) */
2207 address |= TLB_MMIO;
2209 if (is_ram_rom_romd(section)) {
2210 addend = (unsigned long)memory_region_get_ram_ptr(section->mr)
2211 + section_addr(section, paddr);
2212 } else {
2213 addend = 0;
2215 if (is_ram_rom(section)) {
2216 /* Normal RAM. */
2217 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2218 + section_addr(section, paddr);
2219 if (!section->readonly)
2220 iotlb |= phys_section_notdirty;
2221 else
2222 iotlb |= phys_section_rom;
2223 } else {
2224 /* IO handlers are currently passed a physical address.
2225 It would be nice to pass an offset from the base address
2226 of that region. This would avoid having to special case RAM,
2227 and avoid full address decoding in every device.
2228 We can't use the high bits of pd for this because
2229 IO_MEM_ROMD uses these as a ram address. */
2230 iotlb = section - phys_sections;
2231 iotlb += section_addr(section, paddr);
2234 code_address = address;
2235 /* Make accesses to pages with watchpoints go via the
2236 watchpoint trap routines. */
2237 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2238 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2239 /* Avoid trapping reads of pages with a write breakpoint. */
2240 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2241 iotlb = phys_section_watch + paddr;
2242 address |= TLB_MMIO;
2243 break;
2248 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2249 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2250 te = &env->tlb_table[mmu_idx][index];
2251 te->addend = addend - vaddr;
2252 if (prot & PAGE_READ) {
2253 te->addr_read = address;
2254 } else {
2255 te->addr_read = -1;
2258 if (prot & PAGE_EXEC) {
2259 te->addr_code = code_address;
2260 } else {
2261 te->addr_code = -1;
2263 if (prot & PAGE_WRITE) {
2264 if ((memory_region_is_ram(section->mr) && section->readonly)
2265 || is_romd(section)) {
2266 /* Write access calls the I/O callback. */
2267 te->addr_write = address | TLB_MMIO;
2268 } else if (memory_region_is_ram(section->mr)
2269 && !cpu_physical_memory_is_dirty(
2270 section->mr->ram_addr
2271 + section_addr(section, paddr))) {
2272 te->addr_write = address | TLB_NOTDIRTY;
2273 } else {
2274 te->addr_write = address;
2276 } else {
2277 te->addr_write = -1;
2281 #else
2283 void tlb_flush(CPUState *env, int flush_global)
2287 void tlb_flush_page(CPUState *env, target_ulong addr)
2292 * Walks guest process memory "regions" one by one
2293 * and calls callback function 'fn' for each region.
2296 struct walk_memory_regions_data
2298 walk_memory_regions_fn fn;
2299 void *priv;
2300 unsigned long start;
2301 int prot;
2304 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2305 abi_ulong end, int new_prot)
2307 if (data->start != -1ul) {
2308 int rc = data->fn(data->priv, data->start, end, data->prot);
2309 if (rc != 0) {
2310 return rc;
2314 data->start = (new_prot ? end : -1ul);
2315 data->prot = new_prot;
2317 return 0;
2320 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2321 abi_ulong base, int level, void **lp)
2323 abi_ulong pa;
2324 int i, rc;
2326 if (*lp == NULL) {
2327 return walk_memory_regions_end(data, base, 0);
2330 if (level == 0) {
2331 PageDesc *pd = *lp;
2332 for (i = 0; i < L2_SIZE; ++i) {
2333 int prot = pd[i].flags;
2335 pa = base | (i << TARGET_PAGE_BITS);
2336 if (prot != data->prot) {
2337 rc = walk_memory_regions_end(data, pa, prot);
2338 if (rc != 0) {
2339 return rc;
2343 } else {
2344 void **pp = *lp;
2345 for (i = 0; i < L2_SIZE; ++i) {
2346 pa = base | ((abi_ulong)i <<
2347 (TARGET_PAGE_BITS + L2_BITS * level));
2348 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2349 if (rc != 0) {
2350 return rc;
2355 return 0;
2358 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2360 struct walk_memory_regions_data data;
2361 unsigned long i;
2363 data.fn = fn;
2364 data.priv = priv;
2365 data.start = -1ul;
2366 data.prot = 0;
2368 for (i = 0; i < V_L1_SIZE; i++) {
2369 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2370 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2371 if (rc != 0) {
2372 return rc;
2376 return walk_memory_regions_end(&data, 0, 0);
2379 static int dump_region(void *priv, abi_ulong start,
2380 abi_ulong end, unsigned long prot)
2382 FILE *f = (FILE *)priv;
2384 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2385 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2386 start, end, end - start,
2387 ((prot & PAGE_READ) ? 'r' : '-'),
2388 ((prot & PAGE_WRITE) ? 'w' : '-'),
2389 ((prot & PAGE_EXEC) ? 'x' : '-'));
2391 return (0);
2394 /* dump memory mappings */
2395 void page_dump(FILE *f)
2397 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2398 "start", "end", "size", "prot");
2399 walk_memory_regions(f, dump_region);
2402 int page_get_flags(target_ulong address)
2404 PageDesc *p;
2406 p = page_find(address >> TARGET_PAGE_BITS);
2407 if (!p)
2408 return 0;
2409 return p->flags;
2412 /* Modify the flags of a page and invalidate the code if necessary.
2413 The flag PAGE_WRITE_ORG is positioned automatically depending
2414 on PAGE_WRITE. The mmap_lock should already be held. */
2415 void page_set_flags(target_ulong start, target_ulong end, int flags)
2417 target_ulong addr, len;
2419 /* This function should never be called with addresses outside the
2420 guest address space. If this assert fires, it probably indicates
2421 a missing call to h2g_valid. */
2422 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2423 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2424 #endif
2425 assert(start < end);
2427 start = start & TARGET_PAGE_MASK;
2428 end = TARGET_PAGE_ALIGN(end);
2430 if (flags & PAGE_WRITE) {
2431 flags |= PAGE_WRITE_ORG;
2434 for (addr = start, len = end - start;
2435 len != 0;
2436 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2437 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2439 /* If the write protection bit is set, then we invalidate
2440 the code inside. */
2441 if (!(p->flags & PAGE_WRITE) &&
2442 (flags & PAGE_WRITE) &&
2443 p->first_tb) {
2444 tb_invalidate_phys_page(addr, 0, NULL);
2446 p->flags = flags;
2450 int page_check_range(target_ulong start, target_ulong len, int flags)
2452 PageDesc *p;
2453 target_ulong end;
2454 target_ulong addr;
2456 /* This function should never be called with addresses outside the
2457 guest address space. If this assert fires, it probably indicates
2458 a missing call to h2g_valid. */
2459 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2460 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2461 #endif
2463 if (len == 0) {
2464 return 0;
2466 if (start + len - 1 < start) {
2467 /* We've wrapped around. */
2468 return -1;
2471 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2472 start = start & TARGET_PAGE_MASK;
2474 for (addr = start, len = end - start;
2475 len != 0;
2476 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2477 p = page_find(addr >> TARGET_PAGE_BITS);
2478 if( !p )
2479 return -1;
2480 if( !(p->flags & PAGE_VALID) )
2481 return -1;
2483 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2484 return -1;
2485 if (flags & PAGE_WRITE) {
2486 if (!(p->flags & PAGE_WRITE_ORG))
2487 return -1;
2488 /* unprotect the page if it was put read-only because it
2489 contains translated code */
2490 if (!(p->flags & PAGE_WRITE)) {
2491 if (!page_unprotect(addr, 0, NULL))
2492 return -1;
2494 return 0;
2497 return 0;
2500 /* called from signal handler: invalidate the code and unprotect the
2501 page. Return TRUE if the fault was successfully handled. */
2502 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2504 unsigned int prot;
2505 PageDesc *p;
2506 target_ulong host_start, host_end, addr;
2508 /* Technically this isn't safe inside a signal handler. However we
2509 know this only ever happens in a synchronous SEGV handler, so in
2510 practice it seems to be ok. */
2511 mmap_lock();
2513 p = page_find(address >> TARGET_PAGE_BITS);
2514 if (!p) {
2515 mmap_unlock();
2516 return 0;
2519 /* if the page was really writable, then we change its
2520 protection back to writable */
2521 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2522 host_start = address & qemu_host_page_mask;
2523 host_end = host_start + qemu_host_page_size;
2525 prot = 0;
2526 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2527 p = page_find(addr >> TARGET_PAGE_BITS);
2528 p->flags |= PAGE_WRITE;
2529 prot |= p->flags;
2531 /* and since the content will be modified, we must invalidate
2532 the corresponding translated code. */
2533 tb_invalidate_phys_page(addr, pc, puc);
2534 #ifdef DEBUG_TB_CHECK
2535 tb_invalidate_check(addr);
2536 #endif
2538 mprotect((void *)g2h(host_start), qemu_host_page_size,
2539 prot & PAGE_BITS);
2541 mmap_unlock();
2542 return 1;
2544 mmap_unlock();
2545 return 0;
2548 static inline void tlb_set_dirty(CPUState *env,
2549 unsigned long addr, target_ulong vaddr)
2552 #endif /* defined(CONFIG_USER_ONLY) */
2554 #if !defined(CONFIG_USER_ONLY)
2556 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2557 typedef struct subpage_t {
2558 MemoryRegion iomem;
2559 target_phys_addr_t base;
2560 uint16_t sub_section[TARGET_PAGE_SIZE];
2561 } subpage_t;
2563 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2564 uint16_t section);
2565 static subpage_t *subpage_init(target_phys_addr_t base);
2566 static void destroy_page_desc(uint16_t section_index)
2568 MemoryRegionSection *section = &phys_sections[section_index];
2569 MemoryRegion *mr = section->mr;
2571 if (mr->subpage) {
2572 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2573 memory_region_destroy(&subpage->iomem);
2574 g_free(subpage);
2578 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2580 unsigned i;
2581 PhysPageEntry *p;
2583 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2584 return;
2587 p = phys_map_nodes[lp->ptr];
2588 for (i = 0; i < L2_SIZE; ++i) {
2589 if (!p[i].is_leaf) {
2590 destroy_l2_mapping(&p[i], level - 1);
2591 } else {
2592 destroy_page_desc(p[i].ptr);
2595 lp->is_leaf = 0;
2596 lp->ptr = PHYS_MAP_NODE_NIL;
2599 static void destroy_all_mappings(void)
2601 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2602 phys_map_nodes_reset();
2605 static uint16_t phys_section_add(MemoryRegionSection *section)
2607 if (phys_sections_nb == phys_sections_nb_alloc) {
2608 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2609 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2610 phys_sections_nb_alloc);
2612 phys_sections[phys_sections_nb] = *section;
2613 return phys_sections_nb++;
2616 static void phys_sections_clear(void)
2618 phys_sections_nb = 0;
2621 /* register physical memory.
2622 For RAM, 'size' must be a multiple of the target page size.
2623 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2624 io memory page. The address used when calling the IO function is
2625 the offset from the start of the region, plus region_offset. Both
2626 start_addr and region_offset are rounded down to a page boundary
2627 before calculating this offset. This should not be a problem unless
2628 the low bits of start_addr and region_offset differ. */
2629 static void register_subpage(MemoryRegionSection *section)
2631 subpage_t *subpage;
2632 target_phys_addr_t base = section->offset_within_address_space
2633 & TARGET_PAGE_MASK;
2634 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2635 MemoryRegionSection subsection = {
2636 .offset_within_address_space = base,
2637 .size = TARGET_PAGE_SIZE,
2639 target_phys_addr_t start, end;
2641 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2643 if (!(existing->mr->subpage)) {
2644 subpage = subpage_init(base);
2645 subsection.mr = &subpage->iomem;
2646 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2647 phys_section_add(&subsection));
2648 } else {
2649 subpage = container_of(existing->mr, subpage_t, iomem);
2651 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2652 end = start + section->size;
2653 subpage_register(subpage, start, end, phys_section_add(section));
2657 static void register_multipage(MemoryRegionSection *section)
2659 target_phys_addr_t start_addr = section->offset_within_address_space;
2660 ram_addr_t size = section->size;
2661 target_phys_addr_t addr;
2662 uint16_t section_index = phys_section_add(section);
2664 assert(size);
2666 addr = start_addr;
2667 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2668 section_index);
2671 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2672 bool readonly)
2674 MemoryRegionSection now = *section, remain = *section;
2676 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2677 || (now.size < TARGET_PAGE_SIZE)) {
2678 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2679 - now.offset_within_address_space,
2680 now.size);
2681 register_subpage(&now);
2682 remain.size -= now.size;
2683 remain.offset_within_address_space += now.size;
2684 remain.offset_within_region += now.size;
2686 now = remain;
2687 now.size &= TARGET_PAGE_MASK;
2688 if (now.size) {
2689 register_multipage(&now);
2690 remain.size -= now.size;
2691 remain.offset_within_address_space += now.size;
2692 remain.offset_within_region += now.size;
2694 now = remain;
2695 if (now.size) {
2696 register_subpage(&now);
2701 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2703 if (kvm_enabled())
2704 kvm_coalesce_mmio_region(addr, size);
2707 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2709 if (kvm_enabled())
2710 kvm_uncoalesce_mmio_region(addr, size);
2713 void qemu_flush_coalesced_mmio_buffer(void)
2715 if (kvm_enabled())
2716 kvm_flush_coalesced_mmio_buffer();
2719 #if defined(__linux__) && !defined(TARGET_S390X)
2721 #include <sys/vfs.h>
2723 #define HUGETLBFS_MAGIC 0x958458f6
2725 static long gethugepagesize(const char *path)
2727 struct statfs fs;
2728 int ret;
2730 do {
2731 ret = statfs(path, &fs);
2732 } while (ret != 0 && errno == EINTR);
2734 if (ret != 0) {
2735 perror(path);
2736 return 0;
2739 if (fs.f_type != HUGETLBFS_MAGIC)
2740 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2742 return fs.f_bsize;
2745 static void *file_ram_alloc(RAMBlock *block,
2746 ram_addr_t memory,
2747 const char *path)
2749 char *filename;
2750 void *area;
2751 int fd;
2752 #ifdef MAP_POPULATE
2753 int flags;
2754 #endif
2755 unsigned long hpagesize;
2757 hpagesize = gethugepagesize(path);
2758 if (!hpagesize) {
2759 return NULL;
2762 if (memory < hpagesize) {
2763 return NULL;
2766 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2767 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2768 return NULL;
2771 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2772 return NULL;
2775 fd = mkstemp(filename);
2776 if (fd < 0) {
2777 perror("unable to create backing store for hugepages");
2778 free(filename);
2779 return NULL;
2781 unlink(filename);
2782 free(filename);
2784 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2787 * ftruncate is not supported by hugetlbfs in older
2788 * hosts, so don't bother bailing out on errors.
2789 * If anything goes wrong with it under other filesystems,
2790 * mmap will fail.
2792 if (ftruncate(fd, memory))
2793 perror("ftruncate");
2795 #ifdef MAP_POPULATE
2796 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2797 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2798 * to sidestep this quirk.
2800 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2801 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2802 #else
2803 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2804 #endif
2805 if (area == MAP_FAILED) {
2806 perror("file_ram_alloc: can't mmap RAM pages");
2807 close(fd);
2808 return (NULL);
2810 block->fd = fd;
2811 return area;
2813 #endif
2815 static ram_addr_t find_ram_offset(ram_addr_t size)
2817 RAMBlock *block, *next_block;
2818 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2820 if (QLIST_EMPTY(&ram_list.blocks))
2821 return 0;
2823 QLIST_FOREACH(block, &ram_list.blocks, next) {
2824 ram_addr_t end, next = RAM_ADDR_MAX;
2826 end = block->offset + block->length;
2828 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2829 if (next_block->offset >= end) {
2830 next = MIN(next, next_block->offset);
2833 if (next - end >= size && next - end < mingap) {
2834 offset = end;
2835 mingap = next - end;
2839 if (offset == RAM_ADDR_MAX) {
2840 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2841 (uint64_t)size);
2842 abort();
2845 return offset;
2848 static ram_addr_t last_ram_offset(void)
2850 RAMBlock *block;
2851 ram_addr_t last = 0;
2853 QLIST_FOREACH(block, &ram_list.blocks, next)
2854 last = MAX(last, block->offset + block->length);
2856 return last;
2859 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2861 RAMBlock *new_block, *block;
2863 new_block = NULL;
2864 QLIST_FOREACH(block, &ram_list.blocks, next) {
2865 if (block->offset == addr) {
2866 new_block = block;
2867 break;
2870 assert(new_block);
2871 assert(!new_block->idstr[0]);
2873 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2874 char *id = dev->parent_bus->info->get_dev_path(dev);
2875 if (id) {
2876 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2877 g_free(id);
2880 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2882 QLIST_FOREACH(block, &ram_list.blocks, next) {
2883 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2884 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2885 new_block->idstr);
2886 abort();
2891 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2892 MemoryRegion *mr)
2894 RAMBlock *new_block;
2896 size = TARGET_PAGE_ALIGN(size);
2897 new_block = g_malloc0(sizeof(*new_block));
2899 new_block->mr = mr;
2900 new_block->offset = find_ram_offset(size);
2901 if (host) {
2902 new_block->host = host;
2903 new_block->flags |= RAM_PREALLOC_MASK;
2904 } else {
2905 if (mem_path) {
2906 #if defined (__linux__) && !defined(TARGET_S390X)
2907 new_block->host = file_ram_alloc(new_block, size, mem_path);
2908 if (!new_block->host) {
2909 new_block->host = qemu_vmalloc(size);
2910 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2912 #else
2913 fprintf(stderr, "-mem-path option unsupported\n");
2914 exit(1);
2915 #endif
2916 } else {
2917 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2918 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2919 an system defined value, which is at least 256GB. Larger systems
2920 have larger values. We put the guest between the end of data
2921 segment (system break) and this value. We use 32GB as a base to
2922 have enough room for the system break to grow. */
2923 new_block->host = mmap((void*)0x800000000, size,
2924 PROT_EXEC|PROT_READ|PROT_WRITE,
2925 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2926 if (new_block->host == MAP_FAILED) {
2927 fprintf(stderr, "Allocating RAM failed\n");
2928 abort();
2930 #else
2931 if (xen_enabled()) {
2932 xen_ram_alloc(new_block->offset, size, mr);
2933 } else {
2934 new_block->host = qemu_vmalloc(size);
2936 #endif
2937 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2940 new_block->length = size;
2942 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2944 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2945 last_ram_offset() >> TARGET_PAGE_BITS);
2946 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2947 0xff, size >> TARGET_PAGE_BITS);
2949 if (kvm_enabled())
2950 kvm_setup_guest_memory(new_block->host, size);
2952 return new_block->offset;
2955 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2957 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2960 void qemu_ram_free_from_ptr(ram_addr_t addr)
2962 RAMBlock *block;
2964 QLIST_FOREACH(block, &ram_list.blocks, next) {
2965 if (addr == block->offset) {
2966 QLIST_REMOVE(block, next);
2967 g_free(block);
2968 return;
2973 void qemu_ram_free(ram_addr_t addr)
2975 RAMBlock *block;
2977 QLIST_FOREACH(block, &ram_list.blocks, next) {
2978 if (addr == block->offset) {
2979 QLIST_REMOVE(block, next);
2980 if (block->flags & RAM_PREALLOC_MASK) {
2982 } else if (mem_path) {
2983 #if defined (__linux__) && !defined(TARGET_S390X)
2984 if (block->fd) {
2985 munmap(block->host, block->length);
2986 close(block->fd);
2987 } else {
2988 qemu_vfree(block->host);
2990 #else
2991 abort();
2992 #endif
2993 } else {
2994 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2995 munmap(block->host, block->length);
2996 #else
2997 if (xen_enabled()) {
2998 xen_invalidate_map_cache_entry(block->host);
2999 } else {
3000 qemu_vfree(block->host);
3002 #endif
3004 g_free(block);
3005 return;
3011 #ifndef _WIN32
3012 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3014 RAMBlock *block;
3015 ram_addr_t offset;
3016 int flags;
3017 void *area, *vaddr;
3019 QLIST_FOREACH(block, &ram_list.blocks, next) {
3020 offset = addr - block->offset;
3021 if (offset < block->length) {
3022 vaddr = block->host + offset;
3023 if (block->flags & RAM_PREALLOC_MASK) {
3025 } else {
3026 flags = MAP_FIXED;
3027 munmap(vaddr, length);
3028 if (mem_path) {
3029 #if defined(__linux__) && !defined(TARGET_S390X)
3030 if (block->fd) {
3031 #ifdef MAP_POPULATE
3032 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3033 MAP_PRIVATE;
3034 #else
3035 flags |= MAP_PRIVATE;
3036 #endif
3037 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3038 flags, block->fd, offset);
3039 } else {
3040 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3041 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3042 flags, -1, 0);
3044 #else
3045 abort();
3046 #endif
3047 } else {
3048 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3049 flags |= MAP_SHARED | MAP_ANONYMOUS;
3050 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3051 flags, -1, 0);
3052 #else
3053 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3054 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3055 flags, -1, 0);
3056 #endif
3058 if (area != vaddr) {
3059 fprintf(stderr, "Could not remap addr: "
3060 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3061 length, addr);
3062 exit(1);
3064 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3066 return;
3070 #endif /* !_WIN32 */
3072 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3073 With the exception of the softmmu code in this file, this should
3074 only be used for local memory (e.g. video ram) that the device owns,
3075 and knows it isn't going to access beyond the end of the block.
3077 It should not be used for general purpose DMA.
3078 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3080 void *qemu_get_ram_ptr(ram_addr_t addr)
3082 RAMBlock *block;
3084 QLIST_FOREACH(block, &ram_list.blocks, next) {
3085 if (addr - block->offset < block->length) {
3086 /* Move this entry to to start of the list. */
3087 if (block != QLIST_FIRST(&ram_list.blocks)) {
3088 QLIST_REMOVE(block, next);
3089 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3091 if (xen_enabled()) {
3092 /* We need to check if the requested address is in the RAM
3093 * because we don't want to map the entire memory in QEMU.
3094 * In that case just map until the end of the page.
3096 if (block->offset == 0) {
3097 return xen_map_cache(addr, 0, 0);
3098 } else if (block->host == NULL) {
3099 block->host =
3100 xen_map_cache(block->offset, block->length, 1);
3103 return block->host + (addr - block->offset);
3107 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3108 abort();
3110 return NULL;
3113 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3114 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3116 void *qemu_safe_ram_ptr(ram_addr_t addr)
3118 RAMBlock *block;
3120 QLIST_FOREACH(block, &ram_list.blocks, next) {
3121 if (addr - block->offset < block->length) {
3122 if (xen_enabled()) {
3123 /* We need to check if the requested address is in the RAM
3124 * because we don't want to map the entire memory in QEMU.
3125 * In that case just map until the end of the page.
3127 if (block->offset == 0) {
3128 return xen_map_cache(addr, 0, 0);
3129 } else if (block->host == NULL) {
3130 block->host =
3131 xen_map_cache(block->offset, block->length, 1);
3134 return block->host + (addr - block->offset);
3138 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3139 abort();
3141 return NULL;
3144 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3145 * but takes a size argument */
3146 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3148 if (*size == 0) {
3149 return NULL;
3151 if (xen_enabled()) {
3152 return xen_map_cache(addr, *size, 1);
3153 } else {
3154 RAMBlock *block;
3156 QLIST_FOREACH(block, &ram_list.blocks, next) {
3157 if (addr - block->offset < block->length) {
3158 if (addr - block->offset + *size > block->length)
3159 *size = block->length - addr + block->offset;
3160 return block->host + (addr - block->offset);
3164 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3165 abort();
3169 void qemu_put_ram_ptr(void *addr)
3171 trace_qemu_put_ram_ptr(addr);
3174 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3176 RAMBlock *block;
3177 uint8_t *host = ptr;
3179 if (xen_enabled()) {
3180 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3181 return 0;
3184 QLIST_FOREACH(block, &ram_list.blocks, next) {
3185 /* This case append when the block is not mapped. */
3186 if (block->host == NULL) {
3187 continue;
3189 if (host - block->host < block->length) {
3190 *ram_addr = block->offset + (host - block->host);
3191 return 0;
3195 return -1;
3198 /* Some of the softmmu routines need to translate from a host pointer
3199 (typically a TLB entry) back to a ram offset. */
3200 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3202 ram_addr_t ram_addr;
3204 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3205 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3206 abort();
3208 return ram_addr;
3211 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
3212 unsigned size)
3214 #ifdef DEBUG_UNASSIGNED
3215 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3216 #endif
3217 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3218 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
3219 #endif
3220 return 0;
3223 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
3224 uint64_t val, unsigned size)
3226 #ifdef DEBUG_UNASSIGNED
3227 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
3228 #endif
3229 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3230 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
3231 #endif
3234 static const MemoryRegionOps unassigned_mem_ops = {
3235 .read = unassigned_mem_read,
3236 .write = unassigned_mem_write,
3237 .endianness = DEVICE_NATIVE_ENDIAN,
3240 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
3241 unsigned size)
3243 abort();
3246 static void error_mem_write(void *opaque, target_phys_addr_t addr,
3247 uint64_t value, unsigned size)
3249 abort();
3252 static const MemoryRegionOps error_mem_ops = {
3253 .read = error_mem_read,
3254 .write = error_mem_write,
3255 .endianness = DEVICE_NATIVE_ENDIAN,
3258 static const MemoryRegionOps rom_mem_ops = {
3259 .read = error_mem_read,
3260 .write = unassigned_mem_write,
3261 .endianness = DEVICE_NATIVE_ENDIAN,
3264 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
3265 uint64_t val, unsigned size)
3267 int dirty_flags;
3268 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3269 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3270 #if !defined(CONFIG_USER_ONLY)
3271 tb_invalidate_phys_page_fast(ram_addr, size);
3272 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3273 #endif
3275 switch (size) {
3276 case 1:
3277 stb_p(qemu_get_ram_ptr(ram_addr), val);
3278 break;
3279 case 2:
3280 stw_p(qemu_get_ram_ptr(ram_addr), val);
3281 break;
3282 case 4:
3283 stl_p(qemu_get_ram_ptr(ram_addr), val);
3284 break;
3285 default:
3286 abort();
3288 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3289 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3290 /* we remove the notdirty callback only if the code has been
3291 flushed */
3292 if (dirty_flags == 0xff)
3293 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3296 static const MemoryRegionOps notdirty_mem_ops = {
3297 .read = error_mem_read,
3298 .write = notdirty_mem_write,
3299 .endianness = DEVICE_NATIVE_ENDIAN,
3302 /* Generate a debug exception if a watchpoint has been hit. */
3303 static void check_watchpoint(int offset, int len_mask, int flags)
3305 CPUState *env = cpu_single_env;
3306 target_ulong pc, cs_base;
3307 TranslationBlock *tb;
3308 target_ulong vaddr;
3309 CPUWatchpoint *wp;
3310 int cpu_flags;
3312 if (env->watchpoint_hit) {
3313 /* We re-entered the check after replacing the TB. Now raise
3314 * the debug interrupt so that is will trigger after the
3315 * current instruction. */
3316 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3317 return;
3319 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3320 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3321 if ((vaddr == (wp->vaddr & len_mask) ||
3322 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3323 wp->flags |= BP_WATCHPOINT_HIT;
3324 if (!env->watchpoint_hit) {
3325 env->watchpoint_hit = wp;
3326 tb = tb_find_pc(env->mem_io_pc);
3327 if (!tb) {
3328 cpu_abort(env, "check_watchpoint: could not find TB for "
3329 "pc=%p", (void *)env->mem_io_pc);
3331 cpu_restore_state(tb, env, env->mem_io_pc);
3332 tb_phys_invalidate(tb, -1);
3333 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3334 env->exception_index = EXCP_DEBUG;
3335 cpu_loop_exit(env);
3336 } else {
3337 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3338 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3339 cpu_resume_from_signal(env, NULL);
3342 } else {
3343 wp->flags &= ~BP_WATCHPOINT_HIT;
3348 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3349 so these check for a hit then pass through to the normal out-of-line
3350 phys routines. */
3351 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3352 unsigned size)
3354 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3355 switch (size) {
3356 case 1: return ldub_phys(addr);
3357 case 2: return lduw_phys(addr);
3358 case 4: return ldl_phys(addr);
3359 default: abort();
3363 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3364 uint64_t val, unsigned size)
3366 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3367 switch (size) {
3368 case 1:
3369 stb_phys(addr, val);
3370 break;
3371 case 2:
3372 stw_phys(addr, val);
3373 break;
3374 case 4:
3375 stl_phys(addr, val);
3376 break;
3377 default: abort();
3381 static const MemoryRegionOps watch_mem_ops = {
3382 .read = watch_mem_read,
3383 .write = watch_mem_write,
3384 .endianness = DEVICE_NATIVE_ENDIAN,
3387 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3388 unsigned len)
3390 subpage_t *mmio = opaque;
3391 unsigned int idx = SUBPAGE_IDX(addr);
3392 MemoryRegionSection *section;
3393 #if defined(DEBUG_SUBPAGE)
3394 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3395 mmio, len, addr, idx);
3396 #endif
3398 section = &phys_sections[mmio->sub_section[idx]];
3399 addr += mmio->base;
3400 addr -= section->offset_within_address_space;
3401 addr += section->offset_within_region;
3402 return io_mem_read(section->mr->ram_addr, addr, len);
3405 static void subpage_write(void *opaque, target_phys_addr_t addr,
3406 uint64_t value, unsigned len)
3408 subpage_t *mmio = opaque;
3409 unsigned int idx = SUBPAGE_IDX(addr);
3410 MemoryRegionSection *section;
3411 #if defined(DEBUG_SUBPAGE)
3412 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3413 " idx %d value %"PRIx64"\n",
3414 __func__, mmio, len, addr, idx, value);
3415 #endif
3417 section = &phys_sections[mmio->sub_section[idx]];
3418 addr += mmio->base;
3419 addr -= section->offset_within_address_space;
3420 addr += section->offset_within_region;
3421 io_mem_write(section->mr->ram_addr, addr, value, len);
3424 static const MemoryRegionOps subpage_ops = {
3425 .read = subpage_read,
3426 .write = subpage_write,
3427 .endianness = DEVICE_NATIVE_ENDIAN,
3430 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3431 unsigned size)
3433 ram_addr_t raddr = addr;
3434 void *ptr = qemu_get_ram_ptr(raddr);
3435 switch (size) {
3436 case 1: return ldub_p(ptr);
3437 case 2: return lduw_p(ptr);
3438 case 4: return ldl_p(ptr);
3439 default: abort();
3443 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3444 uint64_t value, unsigned size)
3446 ram_addr_t raddr = addr;
3447 void *ptr = qemu_get_ram_ptr(raddr);
3448 switch (size) {
3449 case 1: return stb_p(ptr, value);
3450 case 2: return stw_p(ptr, value);
3451 case 4: return stl_p(ptr, value);
3452 default: abort();
3456 static const MemoryRegionOps subpage_ram_ops = {
3457 .read = subpage_ram_read,
3458 .write = subpage_ram_write,
3459 .endianness = DEVICE_NATIVE_ENDIAN,
3462 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3463 uint16_t section)
3465 int idx, eidx;
3467 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3468 return -1;
3469 idx = SUBPAGE_IDX(start);
3470 eidx = SUBPAGE_IDX(end);
3471 #if defined(DEBUG_SUBPAGE)
3472 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3473 mmio, start, end, idx, eidx, memory);
3474 #endif
3475 if (memory_region_is_ram(phys_sections[section].mr)) {
3476 MemoryRegionSection new_section = phys_sections[section];
3477 new_section.mr = &io_mem_subpage_ram;
3478 section = phys_section_add(&new_section);
3480 for (; idx <= eidx; idx++) {
3481 mmio->sub_section[idx] = section;
3484 return 0;
3487 static subpage_t *subpage_init(target_phys_addr_t base)
3489 subpage_t *mmio;
3491 mmio = g_malloc0(sizeof(subpage_t));
3493 mmio->base = base;
3494 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3495 "subpage", TARGET_PAGE_SIZE);
3496 mmio->iomem.subpage = true;
3497 #if defined(DEBUG_SUBPAGE)
3498 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3499 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3500 #endif
3501 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3503 return mmio;
3506 static int get_free_io_mem_idx(void)
3508 int i;
3510 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3511 if (!io_mem_used[i]) {
3512 io_mem_used[i] = 1;
3513 return i;
3515 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3516 return -1;
3519 /* mem_read and mem_write are arrays of functions containing the
3520 function to access byte (index 0), word (index 1) and dword (index
3521 2). Functions can be omitted with a NULL function pointer.
3522 If io_index is non zero, the corresponding io zone is
3523 modified. If it is zero, a new io zone is allocated. The return
3524 value can be used with cpu_register_physical_memory(). (-1) is
3525 returned if error. */
3526 static int cpu_register_io_memory_fixed(int io_index, MemoryRegion *mr)
3528 if (io_index <= 0) {
3529 io_index = get_free_io_mem_idx();
3530 if (io_index == -1)
3531 return io_index;
3532 } else {
3533 if (io_index >= IO_MEM_NB_ENTRIES)
3534 return -1;
3537 io_mem_region[io_index] = mr;
3539 return io_index;
3542 int cpu_register_io_memory(MemoryRegion *mr)
3544 return cpu_register_io_memory_fixed(0, mr);
3547 void cpu_unregister_io_memory(int io_index)
3549 io_mem_region[io_index] = NULL;
3550 io_mem_used[io_index] = 0;
3553 static uint16_t dummy_section(MemoryRegion *mr)
3555 MemoryRegionSection section = {
3556 .mr = mr,
3557 .offset_within_address_space = 0,
3558 .offset_within_region = 0,
3559 .size = UINT64_MAX,
3562 return phys_section_add(&section);
3565 target_phys_addr_t section_to_ioaddr(target_phys_addr_t section_io_addr)
3567 MemoryRegionSection *section;
3569 section = &phys_sections[section_io_addr & ~TARGET_PAGE_MASK];
3570 return (section_io_addr & TARGET_PAGE_MASK)
3571 | (section->mr->ram_addr & ~TARGET_PAGE_MASK);
3574 static void io_mem_init(void)
3576 int i;
3578 /* Must be first: */
3579 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3580 assert(io_mem_ram.ram_addr == 0);
3581 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3582 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3583 "unassigned", UINT64_MAX);
3584 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3585 "notdirty", UINT64_MAX);
3586 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3587 "subpage-ram", UINT64_MAX);
3588 for (i=0; i<5; i++)
3589 io_mem_used[i] = 1;
3591 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3592 "watch", UINT64_MAX);
3595 static void core_begin(MemoryListener *listener)
3597 destroy_all_mappings();
3598 phys_sections_clear();
3599 phys_map.ptr = PHYS_MAP_NODE_NIL;
3600 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3601 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3602 phys_section_rom = dummy_section(&io_mem_rom);
3603 phys_section_watch = dummy_section(&io_mem_watch);
3606 static void core_commit(MemoryListener *listener)
3608 CPUState *env;
3610 /* since each CPU stores ram addresses in its TLB cache, we must
3611 reset the modified entries */
3612 /* XXX: slow ! */
3613 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3614 tlb_flush(env, 1);
3618 static void core_region_add(MemoryListener *listener,
3619 MemoryRegionSection *section)
3621 cpu_register_physical_memory_log(section, section->readonly);
3624 static void core_region_del(MemoryListener *listener,
3625 MemoryRegionSection *section)
3629 static void core_region_nop(MemoryListener *listener,
3630 MemoryRegionSection *section)
3632 cpu_register_physical_memory_log(section, section->readonly);
3635 static void core_log_start(MemoryListener *listener,
3636 MemoryRegionSection *section)
3640 static void core_log_stop(MemoryListener *listener,
3641 MemoryRegionSection *section)
3645 static void core_log_sync(MemoryListener *listener,
3646 MemoryRegionSection *section)
3650 static void core_log_global_start(MemoryListener *listener)
3652 cpu_physical_memory_set_dirty_tracking(1);
3655 static void core_log_global_stop(MemoryListener *listener)
3657 cpu_physical_memory_set_dirty_tracking(0);
3660 static void core_eventfd_add(MemoryListener *listener,
3661 MemoryRegionSection *section,
3662 bool match_data, uint64_t data, int fd)
3666 static void core_eventfd_del(MemoryListener *listener,
3667 MemoryRegionSection *section,
3668 bool match_data, uint64_t data, int fd)
3672 static void io_begin(MemoryListener *listener)
3676 static void io_commit(MemoryListener *listener)
3680 static void io_region_add(MemoryListener *listener,
3681 MemoryRegionSection *section)
3683 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3685 mrio->mr = section->mr;
3686 mrio->offset = section->offset_within_region;
3687 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3688 section->offset_within_address_space, section->size);
3689 ioport_register(&mrio->iorange);
3692 static void io_region_del(MemoryListener *listener,
3693 MemoryRegionSection *section)
3695 isa_unassign_ioport(section->offset_within_address_space, section->size);
3698 static void io_region_nop(MemoryListener *listener,
3699 MemoryRegionSection *section)
3703 static void io_log_start(MemoryListener *listener,
3704 MemoryRegionSection *section)
3708 static void io_log_stop(MemoryListener *listener,
3709 MemoryRegionSection *section)
3713 static void io_log_sync(MemoryListener *listener,
3714 MemoryRegionSection *section)
3718 static void io_log_global_start(MemoryListener *listener)
3722 static void io_log_global_stop(MemoryListener *listener)
3726 static void io_eventfd_add(MemoryListener *listener,
3727 MemoryRegionSection *section,
3728 bool match_data, uint64_t data, int fd)
3732 static void io_eventfd_del(MemoryListener *listener,
3733 MemoryRegionSection *section,
3734 bool match_data, uint64_t data, int fd)
3738 static MemoryListener core_memory_listener = {
3739 .begin = core_begin,
3740 .commit = core_commit,
3741 .region_add = core_region_add,
3742 .region_del = core_region_del,
3743 .region_nop = core_region_nop,
3744 .log_start = core_log_start,
3745 .log_stop = core_log_stop,
3746 .log_sync = core_log_sync,
3747 .log_global_start = core_log_global_start,
3748 .log_global_stop = core_log_global_stop,
3749 .eventfd_add = core_eventfd_add,
3750 .eventfd_del = core_eventfd_del,
3751 .priority = 0,
3754 static MemoryListener io_memory_listener = {
3755 .begin = io_begin,
3756 .commit = io_commit,
3757 .region_add = io_region_add,
3758 .region_del = io_region_del,
3759 .region_nop = io_region_nop,
3760 .log_start = io_log_start,
3761 .log_stop = io_log_stop,
3762 .log_sync = io_log_sync,
3763 .log_global_start = io_log_global_start,
3764 .log_global_stop = io_log_global_stop,
3765 .eventfd_add = io_eventfd_add,
3766 .eventfd_del = io_eventfd_del,
3767 .priority = 0,
3770 static void memory_map_init(void)
3772 system_memory = g_malloc(sizeof(*system_memory));
3773 memory_region_init(system_memory, "system", INT64_MAX);
3774 set_system_memory_map(system_memory);
3776 system_io = g_malloc(sizeof(*system_io));
3777 memory_region_init(system_io, "io", 65536);
3778 set_system_io_map(system_io);
3780 memory_listener_register(&core_memory_listener, system_memory);
3781 memory_listener_register(&io_memory_listener, system_io);
3784 MemoryRegion *get_system_memory(void)
3786 return system_memory;
3789 MemoryRegion *get_system_io(void)
3791 return system_io;
3794 #endif /* !defined(CONFIG_USER_ONLY) */
3796 /* physical memory access (slow version, mainly for debug) */
3797 #if defined(CONFIG_USER_ONLY)
3798 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3799 uint8_t *buf, int len, int is_write)
3801 int l, flags;
3802 target_ulong page;
3803 void * p;
3805 while (len > 0) {
3806 page = addr & TARGET_PAGE_MASK;
3807 l = (page + TARGET_PAGE_SIZE) - addr;
3808 if (l > len)
3809 l = len;
3810 flags = page_get_flags(page);
3811 if (!(flags & PAGE_VALID))
3812 return -1;
3813 if (is_write) {
3814 if (!(flags & PAGE_WRITE))
3815 return -1;
3816 /* XXX: this code should not depend on lock_user */
3817 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3818 return -1;
3819 memcpy(p, buf, l);
3820 unlock_user(p, addr, l);
3821 } else {
3822 if (!(flags & PAGE_READ))
3823 return -1;
3824 /* XXX: this code should not depend on lock_user */
3825 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3826 return -1;
3827 memcpy(buf, p, l);
3828 unlock_user(p, addr, 0);
3830 len -= l;
3831 buf += l;
3832 addr += l;
3834 return 0;
3837 #else
3838 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3839 int len, int is_write)
3841 int l, io_index;
3842 uint8_t *ptr;
3843 uint32_t val;
3844 target_phys_addr_t page;
3845 MemoryRegionSection *section;
3847 while (len > 0) {
3848 page = addr & TARGET_PAGE_MASK;
3849 l = (page + TARGET_PAGE_SIZE) - addr;
3850 if (l > len)
3851 l = len;
3852 section = phys_page_find(page >> TARGET_PAGE_BITS);
3854 if (is_write) {
3855 if (!memory_region_is_ram(section->mr)) {
3856 target_phys_addr_t addr1;
3857 io_index = memory_region_get_ram_addr(section->mr)
3858 & (IO_MEM_NB_ENTRIES - 1);
3859 addr1 = section_addr(section, addr);
3860 /* XXX: could force cpu_single_env to NULL to avoid
3861 potential bugs */
3862 if (l >= 4 && ((addr1 & 3) == 0)) {
3863 /* 32 bit write access */
3864 val = ldl_p(buf);
3865 io_mem_write(io_index, addr1, val, 4);
3866 l = 4;
3867 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3868 /* 16 bit write access */
3869 val = lduw_p(buf);
3870 io_mem_write(io_index, addr1, val, 2);
3871 l = 2;
3872 } else {
3873 /* 8 bit write access */
3874 val = ldub_p(buf);
3875 io_mem_write(io_index, addr1, val, 1);
3876 l = 1;
3878 } else if (!section->readonly) {
3879 ram_addr_t addr1;
3880 addr1 = memory_region_get_ram_addr(section->mr)
3881 + section_addr(section, addr);
3882 /* RAM case */
3883 ptr = qemu_get_ram_ptr(addr1);
3884 memcpy(ptr, buf, l);
3885 if (!cpu_physical_memory_is_dirty(addr1)) {
3886 /* invalidate code */
3887 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3888 /* set dirty bit */
3889 cpu_physical_memory_set_dirty_flags(
3890 addr1, (0xff & ~CODE_DIRTY_FLAG));
3892 qemu_put_ram_ptr(ptr);
3894 } else {
3895 if (!is_ram_rom_romd(section)) {
3896 target_phys_addr_t addr1;
3897 /* I/O case */
3898 io_index = memory_region_get_ram_addr(section->mr)
3899 & (IO_MEM_NB_ENTRIES - 1);
3900 addr1 = section_addr(section, addr);
3901 if (l >= 4 && ((addr1 & 3) == 0)) {
3902 /* 32 bit read access */
3903 val = io_mem_read(io_index, addr1, 4);
3904 stl_p(buf, val);
3905 l = 4;
3906 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3907 /* 16 bit read access */
3908 val = io_mem_read(io_index, addr1, 2);
3909 stw_p(buf, val);
3910 l = 2;
3911 } else {
3912 /* 8 bit read access */
3913 val = io_mem_read(io_index, addr1, 1);
3914 stb_p(buf, val);
3915 l = 1;
3917 } else {
3918 /* RAM case */
3919 ptr = qemu_get_ram_ptr(section->mr->ram_addr)
3920 + section_addr(section, addr);
3921 memcpy(buf, ptr, l);
3922 qemu_put_ram_ptr(ptr);
3925 len -= l;
3926 buf += l;
3927 addr += l;
3931 /* used for ROM loading : can write in RAM and ROM */
3932 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3933 const uint8_t *buf, int len)
3935 int l;
3936 uint8_t *ptr;
3937 target_phys_addr_t page;
3938 MemoryRegionSection *section;
3940 while (len > 0) {
3941 page = addr & TARGET_PAGE_MASK;
3942 l = (page + TARGET_PAGE_SIZE) - addr;
3943 if (l > len)
3944 l = len;
3945 section = phys_page_find(page >> TARGET_PAGE_BITS);
3947 if (!is_ram_rom_romd(section)) {
3948 /* do nothing */
3949 } else {
3950 unsigned long addr1;
3951 addr1 = memory_region_get_ram_addr(section->mr)
3952 + section_addr(section, addr);
3953 /* ROM/RAM case */
3954 ptr = qemu_get_ram_ptr(addr1);
3955 memcpy(ptr, buf, l);
3956 qemu_put_ram_ptr(ptr);
3958 len -= l;
3959 buf += l;
3960 addr += l;
3964 typedef struct {
3965 void *buffer;
3966 target_phys_addr_t addr;
3967 target_phys_addr_t len;
3968 } BounceBuffer;
3970 static BounceBuffer bounce;
3972 typedef struct MapClient {
3973 void *opaque;
3974 void (*callback)(void *opaque);
3975 QLIST_ENTRY(MapClient) link;
3976 } MapClient;
3978 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3979 = QLIST_HEAD_INITIALIZER(map_client_list);
3981 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3983 MapClient *client = g_malloc(sizeof(*client));
3985 client->opaque = opaque;
3986 client->callback = callback;
3987 QLIST_INSERT_HEAD(&map_client_list, client, link);
3988 return client;
3991 void cpu_unregister_map_client(void *_client)
3993 MapClient *client = (MapClient *)_client;
3995 QLIST_REMOVE(client, link);
3996 g_free(client);
3999 static void cpu_notify_map_clients(void)
4001 MapClient *client;
4003 while (!QLIST_EMPTY(&map_client_list)) {
4004 client = QLIST_FIRST(&map_client_list);
4005 client->callback(client->opaque);
4006 cpu_unregister_map_client(client);
4010 /* Map a physical memory region into a host virtual address.
4011 * May map a subset of the requested range, given by and returned in *plen.
4012 * May return NULL if resources needed to perform the mapping are exhausted.
4013 * Use only for reads OR writes - not for read-modify-write operations.
4014 * Use cpu_register_map_client() to know when retrying the map operation is
4015 * likely to succeed.
4017 void *cpu_physical_memory_map(target_phys_addr_t addr,
4018 target_phys_addr_t *plen,
4019 int is_write)
4021 target_phys_addr_t len = *plen;
4022 target_phys_addr_t todo = 0;
4023 int l;
4024 target_phys_addr_t page;
4025 MemoryRegionSection *section;
4026 ram_addr_t raddr = RAM_ADDR_MAX;
4027 ram_addr_t rlen;
4028 void *ret;
4030 while (len > 0) {
4031 page = addr & TARGET_PAGE_MASK;
4032 l = (page + TARGET_PAGE_SIZE) - addr;
4033 if (l > len)
4034 l = len;
4035 section = phys_page_find(page >> TARGET_PAGE_BITS);
4037 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
4038 if (todo || bounce.buffer) {
4039 break;
4041 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4042 bounce.addr = addr;
4043 bounce.len = l;
4044 if (!is_write) {
4045 cpu_physical_memory_read(addr, bounce.buffer, l);
4048 *plen = l;
4049 return bounce.buffer;
4051 if (!todo) {
4052 raddr = memory_region_get_ram_addr(section->mr)
4053 + section_addr(section, addr);
4056 len -= l;
4057 addr += l;
4058 todo += l;
4060 rlen = todo;
4061 ret = qemu_ram_ptr_length(raddr, &rlen);
4062 *plen = rlen;
4063 return ret;
4066 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4067 * Will also mark the memory as dirty if is_write == 1. access_len gives
4068 * the amount of memory that was actually read or written by the caller.
4070 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4071 int is_write, target_phys_addr_t access_len)
4073 if (buffer != bounce.buffer) {
4074 if (is_write) {
4075 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4076 while (access_len) {
4077 unsigned l;
4078 l = TARGET_PAGE_SIZE;
4079 if (l > access_len)
4080 l = access_len;
4081 if (!cpu_physical_memory_is_dirty(addr1)) {
4082 /* invalidate code */
4083 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4084 /* set dirty bit */
4085 cpu_physical_memory_set_dirty_flags(
4086 addr1, (0xff & ~CODE_DIRTY_FLAG));
4088 addr1 += l;
4089 access_len -= l;
4092 if (xen_enabled()) {
4093 xen_invalidate_map_cache_entry(buffer);
4095 return;
4097 if (is_write) {
4098 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4100 qemu_vfree(bounce.buffer);
4101 bounce.buffer = NULL;
4102 cpu_notify_map_clients();
4105 /* warning: addr must be aligned */
4106 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4107 enum device_endian endian)
4109 int io_index;
4110 uint8_t *ptr;
4111 uint32_t val;
4112 MemoryRegionSection *section;
4114 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4116 if (!is_ram_rom_romd(section)) {
4117 /* I/O case */
4118 io_index = memory_region_get_ram_addr(section->mr)
4119 & (IO_MEM_NB_ENTRIES - 1);
4120 addr = section_addr(section, addr);
4121 val = io_mem_read(io_index, addr, 4);
4122 #if defined(TARGET_WORDS_BIGENDIAN)
4123 if (endian == DEVICE_LITTLE_ENDIAN) {
4124 val = bswap32(val);
4126 #else
4127 if (endian == DEVICE_BIG_ENDIAN) {
4128 val = bswap32(val);
4130 #endif
4131 } else {
4132 /* RAM case */
4133 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4134 & TARGET_PAGE_MASK)
4135 + section_addr(section, addr));
4136 switch (endian) {
4137 case DEVICE_LITTLE_ENDIAN:
4138 val = ldl_le_p(ptr);
4139 break;
4140 case DEVICE_BIG_ENDIAN:
4141 val = ldl_be_p(ptr);
4142 break;
4143 default:
4144 val = ldl_p(ptr);
4145 break;
4148 return val;
4151 uint32_t ldl_phys(target_phys_addr_t addr)
4153 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4156 uint32_t ldl_le_phys(target_phys_addr_t addr)
4158 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4161 uint32_t ldl_be_phys(target_phys_addr_t addr)
4163 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4166 /* warning: addr must be aligned */
4167 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4168 enum device_endian endian)
4170 int io_index;
4171 uint8_t *ptr;
4172 uint64_t val;
4173 MemoryRegionSection *section;
4175 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4177 if (!is_ram_rom_romd(section)) {
4178 /* I/O case */
4179 io_index = memory_region_get_ram_addr(section->mr)
4180 & (IO_MEM_NB_ENTRIES - 1);
4181 addr = section_addr(section, addr);
4183 /* XXX This is broken when device endian != cpu endian.
4184 Fix and add "endian" variable check */
4185 #ifdef TARGET_WORDS_BIGENDIAN
4186 val = io_mem_read(io_index, addr, 4) << 32;
4187 val |= io_mem_read(io_index, addr + 4, 4);
4188 #else
4189 val = io_mem_read(io_index, addr, 4);
4190 val |= io_mem_read(io_index, addr + 4, 4) << 32;
4191 #endif
4192 } else {
4193 /* RAM case */
4194 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4195 & TARGET_PAGE_MASK)
4196 + section_addr(section, addr));
4197 switch (endian) {
4198 case DEVICE_LITTLE_ENDIAN:
4199 val = ldq_le_p(ptr);
4200 break;
4201 case DEVICE_BIG_ENDIAN:
4202 val = ldq_be_p(ptr);
4203 break;
4204 default:
4205 val = ldq_p(ptr);
4206 break;
4209 return val;
4212 uint64_t ldq_phys(target_phys_addr_t addr)
4214 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4217 uint64_t ldq_le_phys(target_phys_addr_t addr)
4219 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4222 uint64_t ldq_be_phys(target_phys_addr_t addr)
4224 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4227 /* XXX: optimize */
4228 uint32_t ldub_phys(target_phys_addr_t addr)
4230 uint8_t val;
4231 cpu_physical_memory_read(addr, &val, 1);
4232 return val;
4235 /* warning: addr must be aligned */
4236 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4237 enum device_endian endian)
4239 int io_index;
4240 uint8_t *ptr;
4241 uint64_t val;
4242 MemoryRegionSection *section;
4244 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4246 if (!is_ram_rom_romd(section)) {
4247 /* I/O case */
4248 io_index = memory_region_get_ram_addr(section->mr)
4249 & (IO_MEM_NB_ENTRIES - 1);
4250 addr = section_addr(section, addr);
4251 val = io_mem_read(io_index, addr, 2);
4252 #if defined(TARGET_WORDS_BIGENDIAN)
4253 if (endian == DEVICE_LITTLE_ENDIAN) {
4254 val = bswap16(val);
4256 #else
4257 if (endian == DEVICE_BIG_ENDIAN) {
4258 val = bswap16(val);
4260 #endif
4261 } else {
4262 /* RAM case */
4263 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4264 & TARGET_PAGE_MASK)
4265 + section_addr(section, addr));
4266 switch (endian) {
4267 case DEVICE_LITTLE_ENDIAN:
4268 val = lduw_le_p(ptr);
4269 break;
4270 case DEVICE_BIG_ENDIAN:
4271 val = lduw_be_p(ptr);
4272 break;
4273 default:
4274 val = lduw_p(ptr);
4275 break;
4278 return val;
4281 uint32_t lduw_phys(target_phys_addr_t addr)
4283 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4286 uint32_t lduw_le_phys(target_phys_addr_t addr)
4288 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4291 uint32_t lduw_be_phys(target_phys_addr_t addr)
4293 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4296 /* warning: addr must be aligned. The ram page is not masked as dirty
4297 and the code inside is not invalidated. It is useful if the dirty
4298 bits are used to track modified PTEs */
4299 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4301 int io_index;
4302 uint8_t *ptr;
4303 MemoryRegionSection *section;
4305 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4307 if (!memory_region_is_ram(section->mr) || section->readonly) {
4308 if (memory_region_is_ram(section->mr)) {
4309 io_index = io_mem_rom.ram_addr;
4310 } else {
4311 io_index = memory_region_get_ram_addr(section->mr);
4313 addr = section_addr(section, addr);
4314 io_mem_write(io_index, addr, val, 4);
4315 } else {
4316 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
4317 & TARGET_PAGE_MASK)
4318 + section_addr(section, addr);
4319 ptr = qemu_get_ram_ptr(addr1);
4320 stl_p(ptr, val);
4322 if (unlikely(in_migration)) {
4323 if (!cpu_physical_memory_is_dirty(addr1)) {
4324 /* invalidate code */
4325 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4326 /* set dirty bit */
4327 cpu_physical_memory_set_dirty_flags(
4328 addr1, (0xff & ~CODE_DIRTY_FLAG));
4334 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4336 int io_index;
4337 uint8_t *ptr;
4338 MemoryRegionSection *section;
4340 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4342 if (!memory_region_is_ram(section->mr) || section->readonly) {
4343 if (memory_region_is_ram(section->mr)) {
4344 io_index = io_mem_rom.ram_addr;
4345 } else {
4346 io_index = memory_region_get_ram_addr(section->mr)
4347 & (IO_MEM_NB_ENTRIES - 1);
4349 addr = section_addr(section, addr);
4350 #ifdef TARGET_WORDS_BIGENDIAN
4351 io_mem_write(io_index, addr, val >> 32, 4);
4352 io_mem_write(io_index, addr + 4, (uint32_t)val, 4);
4353 #else
4354 io_mem_write(io_index, addr, (uint32_t)val, 4);
4355 io_mem_write(io_index, addr + 4, val >> 32, 4);
4356 #endif
4357 } else {
4358 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4359 & TARGET_PAGE_MASK)
4360 + section_addr(section, addr));
4361 stq_p(ptr, val);
4365 /* warning: addr must be aligned */
4366 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4367 enum device_endian endian)
4369 int io_index;
4370 uint8_t *ptr;
4371 MemoryRegionSection *section;
4373 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4375 if (!memory_region_is_ram(section->mr) || section->readonly) {
4376 if (memory_region_is_ram(section->mr)) {
4377 io_index = io_mem_rom.ram_addr;
4378 } else {
4379 io_index = memory_region_get_ram_addr(section->mr)
4380 & (IO_MEM_NB_ENTRIES - 1);
4382 addr = section_addr(section, addr);
4383 #if defined(TARGET_WORDS_BIGENDIAN)
4384 if (endian == DEVICE_LITTLE_ENDIAN) {
4385 val = bswap32(val);
4387 #else
4388 if (endian == DEVICE_BIG_ENDIAN) {
4389 val = bswap32(val);
4391 #endif
4392 io_mem_write(io_index, addr, val, 4);
4393 } else {
4394 unsigned long addr1;
4395 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4396 + section_addr(section, addr);
4397 /* RAM case */
4398 ptr = qemu_get_ram_ptr(addr1);
4399 switch (endian) {
4400 case DEVICE_LITTLE_ENDIAN:
4401 stl_le_p(ptr, val);
4402 break;
4403 case DEVICE_BIG_ENDIAN:
4404 stl_be_p(ptr, val);
4405 break;
4406 default:
4407 stl_p(ptr, val);
4408 break;
4410 if (!cpu_physical_memory_is_dirty(addr1)) {
4411 /* invalidate code */
4412 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4413 /* set dirty bit */
4414 cpu_physical_memory_set_dirty_flags(addr1,
4415 (0xff & ~CODE_DIRTY_FLAG));
4420 void stl_phys(target_phys_addr_t addr, uint32_t val)
4422 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4425 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4427 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4430 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4432 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4435 /* XXX: optimize */
4436 void stb_phys(target_phys_addr_t addr, uint32_t val)
4438 uint8_t v = val;
4439 cpu_physical_memory_write(addr, &v, 1);
4442 /* warning: addr must be aligned */
4443 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4444 enum device_endian endian)
4446 int io_index;
4447 uint8_t *ptr;
4448 MemoryRegionSection *section;
4450 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4452 if (!memory_region_is_ram(section->mr) || section->readonly) {
4453 if (memory_region_is_ram(section->mr)) {
4454 io_index = io_mem_rom.ram_addr;
4455 } else {
4456 io_index = memory_region_get_ram_addr(section->mr)
4457 & (IO_MEM_NB_ENTRIES - 1);
4459 addr = section_addr(section, addr);
4460 #if defined(TARGET_WORDS_BIGENDIAN)
4461 if (endian == DEVICE_LITTLE_ENDIAN) {
4462 val = bswap16(val);
4464 #else
4465 if (endian == DEVICE_BIG_ENDIAN) {
4466 val = bswap16(val);
4468 #endif
4469 io_mem_write(io_index, addr, val, 2);
4470 } else {
4471 unsigned long addr1;
4472 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4473 + section_addr(section, addr);
4474 /* RAM case */
4475 ptr = qemu_get_ram_ptr(addr1);
4476 switch (endian) {
4477 case DEVICE_LITTLE_ENDIAN:
4478 stw_le_p(ptr, val);
4479 break;
4480 case DEVICE_BIG_ENDIAN:
4481 stw_be_p(ptr, val);
4482 break;
4483 default:
4484 stw_p(ptr, val);
4485 break;
4487 if (!cpu_physical_memory_is_dirty(addr1)) {
4488 /* invalidate code */
4489 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4490 /* set dirty bit */
4491 cpu_physical_memory_set_dirty_flags(addr1,
4492 (0xff & ~CODE_DIRTY_FLAG));
4497 void stw_phys(target_phys_addr_t addr, uint32_t val)
4499 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4502 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4504 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4507 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4509 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4512 /* XXX: optimize */
4513 void stq_phys(target_phys_addr_t addr, uint64_t val)
4515 val = tswap64(val);
4516 cpu_physical_memory_write(addr, &val, 8);
4519 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4521 val = cpu_to_le64(val);
4522 cpu_physical_memory_write(addr, &val, 8);
4525 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4527 val = cpu_to_be64(val);
4528 cpu_physical_memory_write(addr, &val, 8);
4531 /* virtual memory access for debug (includes writing to ROM) */
4532 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4533 uint8_t *buf, int len, int is_write)
4535 int l;
4536 target_phys_addr_t phys_addr;
4537 target_ulong page;
4539 while (len > 0) {
4540 page = addr & TARGET_PAGE_MASK;
4541 phys_addr = cpu_get_phys_page_debug(env, page);
4542 /* if no physical page mapped, return an error */
4543 if (phys_addr == -1)
4544 return -1;
4545 l = (page + TARGET_PAGE_SIZE) - addr;
4546 if (l > len)
4547 l = len;
4548 phys_addr += (addr & ~TARGET_PAGE_MASK);
4549 if (is_write)
4550 cpu_physical_memory_write_rom(phys_addr, buf, l);
4551 else
4552 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4553 len -= l;
4554 buf += l;
4555 addr += l;
4557 return 0;
4559 #endif
4561 /* in deterministic execution mode, instructions doing device I/Os
4562 must be at the end of the TB */
4563 void cpu_io_recompile(CPUState *env, void *retaddr)
4565 TranslationBlock *tb;
4566 uint32_t n, cflags;
4567 target_ulong pc, cs_base;
4568 uint64_t flags;
4570 tb = tb_find_pc((unsigned long)retaddr);
4571 if (!tb) {
4572 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4573 retaddr);
4575 n = env->icount_decr.u16.low + tb->icount;
4576 cpu_restore_state(tb, env, (unsigned long)retaddr);
4577 /* Calculate how many instructions had been executed before the fault
4578 occurred. */
4579 n = n - env->icount_decr.u16.low;
4580 /* Generate a new TB ending on the I/O insn. */
4581 n++;
4582 /* On MIPS and SH, delay slot instructions can only be restarted if
4583 they were already the first instruction in the TB. If this is not
4584 the first instruction in a TB then re-execute the preceding
4585 branch. */
4586 #if defined(TARGET_MIPS)
4587 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4588 env->active_tc.PC -= 4;
4589 env->icount_decr.u16.low++;
4590 env->hflags &= ~MIPS_HFLAG_BMASK;
4592 #elif defined(TARGET_SH4)
4593 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4594 && n > 1) {
4595 env->pc -= 2;
4596 env->icount_decr.u16.low++;
4597 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4599 #endif
4600 /* This should never happen. */
4601 if (n > CF_COUNT_MASK)
4602 cpu_abort(env, "TB too big during recompile");
4604 cflags = n | CF_LAST_IO;
4605 pc = tb->pc;
4606 cs_base = tb->cs_base;
4607 flags = tb->flags;
4608 tb_phys_invalidate(tb, -1);
4609 /* FIXME: In theory this could raise an exception. In practice
4610 we have already translated the block once so it's probably ok. */
4611 tb_gen_code(env, pc, cs_base, flags, cflags);
4612 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4613 the first in the TB) then we end up generating a whole new TB and
4614 repeating the fault, which is horribly inefficient.
4615 Better would be to execute just this insn uncached, or generate a
4616 second new TB. */
4617 cpu_resume_from_signal(env, NULL);
4620 #if !defined(CONFIG_USER_ONLY)
4622 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4624 int i, target_code_size, max_target_code_size;
4625 int direct_jmp_count, direct_jmp2_count, cross_page;
4626 TranslationBlock *tb;
4628 target_code_size = 0;
4629 max_target_code_size = 0;
4630 cross_page = 0;
4631 direct_jmp_count = 0;
4632 direct_jmp2_count = 0;
4633 for(i = 0; i < nb_tbs; i++) {
4634 tb = &tbs[i];
4635 target_code_size += tb->size;
4636 if (tb->size > max_target_code_size)
4637 max_target_code_size = tb->size;
4638 if (tb->page_addr[1] != -1)
4639 cross_page++;
4640 if (tb->tb_next_offset[0] != 0xffff) {
4641 direct_jmp_count++;
4642 if (tb->tb_next_offset[1] != 0xffff) {
4643 direct_jmp2_count++;
4647 /* XXX: avoid using doubles ? */
4648 cpu_fprintf(f, "Translation buffer state:\n");
4649 cpu_fprintf(f, "gen code size %td/%ld\n",
4650 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4651 cpu_fprintf(f, "TB count %d/%d\n",
4652 nb_tbs, code_gen_max_blocks);
4653 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4654 nb_tbs ? target_code_size / nb_tbs : 0,
4655 max_target_code_size);
4656 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4657 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4658 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4659 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4660 cross_page,
4661 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4662 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4663 direct_jmp_count,
4664 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4665 direct_jmp2_count,
4666 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4667 cpu_fprintf(f, "\nStatistics:\n");
4668 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4669 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4670 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4671 tcg_dump_info(f, cpu_fprintf);
4674 /* NOTE: this function can trigger an exception */
4675 /* NOTE2: the returned address is not exactly the physical address: it
4676 is the offset relative to phys_ram_base */
4677 tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
4679 int mmu_idx, page_index, pd;
4680 void *p;
4682 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
4683 mmu_idx = cpu_mmu_index(env1);
4684 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
4685 (addr & TARGET_PAGE_MASK))) {
4686 ldub_code(addr);
4688 pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
4689 if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
4690 && !io_mem_region[pd]->rom_device) {
4691 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
4692 cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
4693 #else
4694 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
4695 #endif
4697 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
4698 return qemu_ram_addr_from_host_nofail(p);
4702 * A helper function for the _utterly broken_ virtio device model to find out if
4703 * it's running on a big endian machine. Don't do this at home kids!
4705 bool virtio_is_big_endian(void);
4706 bool virtio_is_big_endian(void)
4708 #if defined(TARGET_WORDS_BIGENDIAN)
4709 return true;
4710 #else
4711 return false;
4712 #endif
4715 #define MMUSUFFIX _cmmu
4716 #undef GETPC
4717 #define GETPC() NULL
4718 #define env cpu_single_env
4719 #define SOFTMMU_CODE_ACCESS
4721 #define SHIFT 0
4722 #include "softmmu_template.h"
4724 #define SHIFT 1
4725 #include "softmmu_template.h"
4727 #define SHIFT 2
4728 #include "softmmu_template.h"
4730 #define SHIFT 3
4731 #include "softmmu_template.h"
4733 #undef env
4735 #endif