memory: give phys_page_find() its own tree search loop
[qemu/ar7.git] / exec.c
blob24423d5cb6afbafbee88b09645cd64570abc44af
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #define WANT_EXEC_OBSOLETE
61 #include "exec-obsolete.h"
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
118 static MemoryRegion *system_memory;
119 static MemoryRegion *system_io;
121 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
122 static MemoryRegion io_mem_subpage_ram;
124 #endif
126 CPUState *first_cpu;
127 /* current CPU in the current thread. It is only valid inside
128 cpu_exec() */
129 DEFINE_TLS(CPUState *,cpu_single_env);
130 /* 0 = Do not count executed instructions.
131 1 = Precise instruction counting.
132 2 = Adaptive rate instruction counting. */
133 int use_icount = 0;
135 typedef struct PageDesc {
136 /* list of TBs intersecting this ram page */
137 TranslationBlock *first_tb;
138 /* in order to optimize self modifying code, we count the number
139 of lookups we do to a given page to use a bitmap */
140 unsigned int code_write_count;
141 uint8_t *code_bitmap;
142 #if defined(CONFIG_USER_ONLY)
143 unsigned long flags;
144 #endif
145 } PageDesc;
147 /* In system mode we want L1_MAP to be based on ram offsets,
148 while in user mode we want it to be based on virtual addresses. */
149 #if !defined(CONFIG_USER_ONLY)
150 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
151 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
154 #endif
155 #else
156 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
157 #endif
159 /* Size of the L2 (and L3, etc) page tables. */
160 #define L2_BITS 10
161 #define L2_SIZE (1 << L2_BITS)
163 #define P_L2_LEVELS \
164 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
166 /* The bits remaining after N lower levels of page tables. */
167 #define V_L1_BITS_REM \
168 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
170 #if V_L1_BITS_REM < 4
171 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
172 #else
173 #define V_L1_BITS V_L1_BITS_REM
174 #endif
176 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
178 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
180 unsigned long qemu_real_host_page_size;
181 unsigned long qemu_host_page_size;
182 unsigned long qemu_host_page_mask;
184 /* This is a multi-level map on the virtual address space.
185 The bottom level has pointers to PageDesc. */
186 static void *l1_map[V_L1_SIZE];
188 #if !defined(CONFIG_USER_ONLY)
189 typedef struct PhysPageEntry PhysPageEntry;
191 static MemoryRegionSection *phys_sections;
192 static unsigned phys_sections_nb, phys_sections_nb_alloc;
193 static uint16_t phys_section_unassigned;
195 struct PhysPageEntry {
196 union {
197 uint16_t leaf; /* index into phys_sections */
198 uint16_t node; /* index into phys_map_nodes */
199 } u;
202 /* Simple allocator for PhysPageEntry nodes */
203 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
204 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206 #define PHYS_MAP_NODE_NIL ((uint16_t)~0)
208 /* This is a multi-level map on the physical address space.
209 The bottom level has pointers to MemoryRegionSections. */
210 static PhysPageEntry phys_map = { .u.node = PHYS_MAP_NODE_NIL };
212 static void io_mem_init(void);
213 static void memory_map_init(void);
215 /* io memory support */
216 MemoryRegion *io_mem_region[IO_MEM_NB_ENTRIES];
217 static char io_mem_used[IO_MEM_NB_ENTRIES];
218 static MemoryRegion io_mem_watch;
219 #endif
221 /* log support */
222 #ifdef WIN32
223 static const char *logfilename = "qemu.log";
224 #else
225 static const char *logfilename = "/tmp/qemu.log";
226 #endif
227 FILE *logfile;
228 int loglevel;
229 static int log_append = 0;
231 /* statistics */
232 #if !defined(CONFIG_USER_ONLY)
233 static int tlb_flush_count;
234 #endif
235 static int tb_flush_count;
236 static int tb_phys_invalidate_count;
238 #ifdef _WIN32
239 static void map_exec(void *addr, long size)
241 DWORD old_protect;
242 VirtualProtect(addr, size,
243 PAGE_EXECUTE_READWRITE, &old_protect);
246 #else
247 static void map_exec(void *addr, long size)
249 unsigned long start, end, page_size;
251 page_size = getpagesize();
252 start = (unsigned long)addr;
253 start &= ~(page_size - 1);
255 end = (unsigned long)addr + size;
256 end += page_size - 1;
257 end &= ~(page_size - 1);
259 mprotect((void *)start, end - start,
260 PROT_READ | PROT_WRITE | PROT_EXEC);
262 #endif
264 static void page_init(void)
266 /* NOTE: we can always suppose that qemu_host_page_size >=
267 TARGET_PAGE_SIZE */
268 #ifdef _WIN32
270 SYSTEM_INFO system_info;
272 GetSystemInfo(&system_info);
273 qemu_real_host_page_size = system_info.dwPageSize;
275 #else
276 qemu_real_host_page_size = getpagesize();
277 #endif
278 if (qemu_host_page_size == 0)
279 qemu_host_page_size = qemu_real_host_page_size;
280 if (qemu_host_page_size < TARGET_PAGE_SIZE)
281 qemu_host_page_size = TARGET_PAGE_SIZE;
282 qemu_host_page_mask = ~(qemu_host_page_size - 1);
284 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
286 #ifdef HAVE_KINFO_GETVMMAP
287 struct kinfo_vmentry *freep;
288 int i, cnt;
290 freep = kinfo_getvmmap(getpid(), &cnt);
291 if (freep) {
292 mmap_lock();
293 for (i = 0; i < cnt; i++) {
294 unsigned long startaddr, endaddr;
296 startaddr = freep[i].kve_start;
297 endaddr = freep[i].kve_end;
298 if (h2g_valid(startaddr)) {
299 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
301 if (h2g_valid(endaddr)) {
302 endaddr = h2g(endaddr);
303 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
304 } else {
305 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
306 endaddr = ~0ul;
307 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
308 #endif
312 free(freep);
313 mmap_unlock();
315 #else
316 FILE *f;
318 last_brk = (unsigned long)sbrk(0);
320 f = fopen("/compat/linux/proc/self/maps", "r");
321 if (f) {
322 mmap_lock();
324 do {
325 unsigned long startaddr, endaddr;
326 int n;
328 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
330 if (n == 2 && h2g_valid(startaddr)) {
331 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
333 if (h2g_valid(endaddr)) {
334 endaddr = h2g(endaddr);
335 } else {
336 endaddr = ~0ul;
338 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
340 } while (!feof(f));
342 fclose(f);
343 mmap_unlock();
345 #endif
347 #endif
350 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
352 PageDesc *pd;
353 void **lp;
354 int i;
356 #if defined(CONFIG_USER_ONLY)
357 /* We can't use g_malloc because it may recurse into a locked mutex. */
358 # define ALLOC(P, SIZE) \
359 do { \
360 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
361 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
362 } while (0)
363 #else
364 # define ALLOC(P, SIZE) \
365 do { P = g_malloc0(SIZE); } while (0)
366 #endif
368 /* Level 1. Always allocated. */
369 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
371 /* Level 2..N-1. */
372 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
373 void **p = *lp;
375 if (p == NULL) {
376 if (!alloc) {
377 return NULL;
379 ALLOC(p, sizeof(void *) * L2_SIZE);
380 *lp = p;
383 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
386 pd = *lp;
387 if (pd == NULL) {
388 if (!alloc) {
389 return NULL;
391 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
392 *lp = pd;
395 #undef ALLOC
397 return pd + (index & (L2_SIZE - 1));
400 static inline PageDesc *page_find(tb_page_addr_t index)
402 return page_find_alloc(index, 0);
405 #if !defined(CONFIG_USER_ONLY)
407 static PhysPageEntry *phys_map_node_alloc(uint16_t *ptr)
409 unsigned i;
410 uint16_t ret;
412 /* Assign early to avoid the pointer being invalidated by g_renew() */
413 *ptr = ret = phys_map_nodes_nb++;
414 assert(ret != PHYS_MAP_NODE_NIL);
415 if (ret == phys_map_nodes_nb_alloc) {
416 typedef PhysPageEntry Node[L2_SIZE];
417 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
418 phys_map_nodes = g_renew(Node, phys_map_nodes,
419 phys_map_nodes_nb_alloc);
421 for (i = 0; i < L2_SIZE; ++i) {
422 phys_map_nodes[ret][i].u.node = PHYS_MAP_NODE_NIL;
424 return phys_map_nodes[ret];
427 static void phys_map_nodes_reset(void)
429 phys_map_nodes_nb = 0;
432 static uint16_t *phys_page_find_alloc(target_phys_addr_t index, int alloc)
434 PhysPageEntry *lp, *p;
435 int i, j;
437 lp = &phys_map;
439 /* Level 1..N. */
440 for (i = P_L2_LEVELS - 1; i >= 0; i--) {
441 if (lp->u.node == PHYS_MAP_NODE_NIL) {
442 if (!alloc) {
443 return NULL;
445 p = phys_map_node_alloc(&lp->u.node);
446 if (i == 0) {
447 for (j = 0; j < L2_SIZE; j++) {
448 p[j].u.leaf = phys_section_unassigned;
451 } else {
452 p = phys_map_nodes[lp->u.node];
454 lp = &p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
457 return &lp->u.leaf;
460 static MemoryRegionSection phys_page_find(target_phys_addr_t index)
462 PhysPageEntry lp = phys_map;
463 PhysPageEntry *p;
464 int i;
465 MemoryRegionSection section;
466 target_phys_addr_t delta;
467 uint16_t s_index = phys_section_unassigned;
469 for (i = P_L2_LEVELS - 1; i >= 0; i--) {
470 if (lp.u.node == PHYS_MAP_NODE_NIL) {
471 goto not_found;
473 p = phys_map_nodes[lp.u.node];
474 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
477 s_index = lp.u.leaf;
478 not_found:
479 section = phys_sections[s_index];
480 index <<= TARGET_PAGE_BITS;
481 assert(section.offset_within_address_space <= index
482 && index <= section.offset_within_address_space + section.size-1);
483 delta = index - section.offset_within_address_space;
484 section.offset_within_address_space += delta;
485 section.offset_within_region += delta;
486 section.size -= delta;
487 return section;
490 static void tlb_protect_code(ram_addr_t ram_addr);
491 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
492 target_ulong vaddr);
493 #define mmap_lock() do { } while(0)
494 #define mmap_unlock() do { } while(0)
495 #endif
497 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
499 #if defined(CONFIG_USER_ONLY)
500 /* Currently it is not recommended to allocate big chunks of data in
501 user mode. It will change when a dedicated libc will be used */
502 #define USE_STATIC_CODE_GEN_BUFFER
503 #endif
505 #ifdef USE_STATIC_CODE_GEN_BUFFER
506 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
507 __attribute__((aligned (CODE_GEN_ALIGN)));
508 #endif
510 static void code_gen_alloc(unsigned long tb_size)
512 #ifdef USE_STATIC_CODE_GEN_BUFFER
513 code_gen_buffer = static_code_gen_buffer;
514 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
515 map_exec(code_gen_buffer, code_gen_buffer_size);
516 #else
517 code_gen_buffer_size = tb_size;
518 if (code_gen_buffer_size == 0) {
519 #if defined(CONFIG_USER_ONLY)
520 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
521 #else
522 /* XXX: needs adjustments */
523 code_gen_buffer_size = (unsigned long)(ram_size / 4);
524 #endif
526 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
527 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
528 /* The code gen buffer location may have constraints depending on
529 the host cpu and OS */
530 #if defined(__linux__)
532 int flags;
533 void *start = NULL;
535 flags = MAP_PRIVATE | MAP_ANONYMOUS;
536 #if defined(__x86_64__)
537 flags |= MAP_32BIT;
538 /* Cannot map more than that */
539 if (code_gen_buffer_size > (800 * 1024 * 1024))
540 code_gen_buffer_size = (800 * 1024 * 1024);
541 #elif defined(__sparc_v9__)
542 // Map the buffer below 2G, so we can use direct calls and branches
543 flags |= MAP_FIXED;
544 start = (void *) 0x60000000UL;
545 if (code_gen_buffer_size > (512 * 1024 * 1024))
546 code_gen_buffer_size = (512 * 1024 * 1024);
547 #elif defined(__arm__)
548 /* Keep the buffer no bigger than 16MB to branch between blocks */
549 if (code_gen_buffer_size > 16 * 1024 * 1024)
550 code_gen_buffer_size = 16 * 1024 * 1024;
551 #elif defined(__s390x__)
552 /* Map the buffer so that we can use direct calls and branches. */
553 /* We have a +- 4GB range on the branches; leave some slop. */
554 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
555 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
557 start = (void *)0x90000000UL;
558 #endif
559 code_gen_buffer = mmap(start, code_gen_buffer_size,
560 PROT_WRITE | PROT_READ | PROT_EXEC,
561 flags, -1, 0);
562 if (code_gen_buffer == MAP_FAILED) {
563 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
564 exit(1);
567 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
568 || defined(__DragonFly__) || defined(__OpenBSD__) \
569 || defined(__NetBSD__)
571 int flags;
572 void *addr = NULL;
573 flags = MAP_PRIVATE | MAP_ANONYMOUS;
574 #if defined(__x86_64__)
575 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
576 * 0x40000000 is free */
577 flags |= MAP_FIXED;
578 addr = (void *)0x40000000;
579 /* Cannot map more than that */
580 if (code_gen_buffer_size > (800 * 1024 * 1024))
581 code_gen_buffer_size = (800 * 1024 * 1024);
582 #elif defined(__sparc_v9__)
583 // Map the buffer below 2G, so we can use direct calls and branches
584 flags |= MAP_FIXED;
585 addr = (void *) 0x60000000UL;
586 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
587 code_gen_buffer_size = (512 * 1024 * 1024);
589 #endif
590 code_gen_buffer = mmap(addr, code_gen_buffer_size,
591 PROT_WRITE | PROT_READ | PROT_EXEC,
592 flags, -1, 0);
593 if (code_gen_buffer == MAP_FAILED) {
594 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
595 exit(1);
598 #else
599 code_gen_buffer = g_malloc(code_gen_buffer_size);
600 map_exec(code_gen_buffer, code_gen_buffer_size);
601 #endif
602 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
603 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
604 code_gen_buffer_max_size = code_gen_buffer_size -
605 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
606 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
607 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
610 /* Must be called before using the QEMU cpus. 'tb_size' is the size
611 (in bytes) allocated to the translation buffer. Zero means default
612 size. */
613 void tcg_exec_init(unsigned long tb_size)
615 cpu_gen_init();
616 code_gen_alloc(tb_size);
617 code_gen_ptr = code_gen_buffer;
618 page_init();
619 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
620 /* There's no guest base to take into account, so go ahead and
621 initialize the prologue now. */
622 tcg_prologue_init(&tcg_ctx);
623 #endif
626 bool tcg_enabled(void)
628 return code_gen_buffer != NULL;
631 void cpu_exec_init_all(void)
633 #if !defined(CONFIG_USER_ONLY)
634 memory_map_init();
635 io_mem_init();
636 #endif
639 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
641 static int cpu_common_post_load(void *opaque, int version_id)
643 CPUState *env = opaque;
645 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
646 version_id is increased. */
647 env->interrupt_request &= ~0x01;
648 tlb_flush(env, 1);
650 return 0;
653 static const VMStateDescription vmstate_cpu_common = {
654 .name = "cpu_common",
655 .version_id = 1,
656 .minimum_version_id = 1,
657 .minimum_version_id_old = 1,
658 .post_load = cpu_common_post_load,
659 .fields = (VMStateField []) {
660 VMSTATE_UINT32(halted, CPUState),
661 VMSTATE_UINT32(interrupt_request, CPUState),
662 VMSTATE_END_OF_LIST()
665 #endif
667 CPUState *qemu_get_cpu(int cpu)
669 CPUState *env = first_cpu;
671 while (env) {
672 if (env->cpu_index == cpu)
673 break;
674 env = env->next_cpu;
677 return env;
680 void cpu_exec_init(CPUState *env)
682 CPUState **penv;
683 int cpu_index;
685 #if defined(CONFIG_USER_ONLY)
686 cpu_list_lock();
687 #endif
688 env->next_cpu = NULL;
689 penv = &first_cpu;
690 cpu_index = 0;
691 while (*penv != NULL) {
692 penv = &(*penv)->next_cpu;
693 cpu_index++;
695 env->cpu_index = cpu_index;
696 env->numa_node = 0;
697 QTAILQ_INIT(&env->breakpoints);
698 QTAILQ_INIT(&env->watchpoints);
699 #ifndef CONFIG_USER_ONLY
700 env->thread_id = qemu_get_thread_id();
701 #endif
702 *penv = env;
703 #if defined(CONFIG_USER_ONLY)
704 cpu_list_unlock();
705 #endif
706 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
707 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
708 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
709 cpu_save, cpu_load, env);
710 #endif
713 /* Allocate a new translation block. Flush the translation buffer if
714 too many translation blocks or too much generated code. */
715 static TranslationBlock *tb_alloc(target_ulong pc)
717 TranslationBlock *tb;
719 if (nb_tbs >= code_gen_max_blocks ||
720 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
721 return NULL;
722 tb = &tbs[nb_tbs++];
723 tb->pc = pc;
724 tb->cflags = 0;
725 return tb;
728 void tb_free(TranslationBlock *tb)
730 /* In practice this is mostly used for single use temporary TB
731 Ignore the hard cases and just back up if this TB happens to
732 be the last one generated. */
733 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
734 code_gen_ptr = tb->tc_ptr;
735 nb_tbs--;
739 static inline void invalidate_page_bitmap(PageDesc *p)
741 if (p->code_bitmap) {
742 g_free(p->code_bitmap);
743 p->code_bitmap = NULL;
745 p->code_write_count = 0;
748 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
750 static void page_flush_tb_1 (int level, void **lp)
752 int i;
754 if (*lp == NULL) {
755 return;
757 if (level == 0) {
758 PageDesc *pd = *lp;
759 for (i = 0; i < L2_SIZE; ++i) {
760 pd[i].first_tb = NULL;
761 invalidate_page_bitmap(pd + i);
763 } else {
764 void **pp = *lp;
765 for (i = 0; i < L2_SIZE; ++i) {
766 page_flush_tb_1 (level - 1, pp + i);
771 static void page_flush_tb(void)
773 int i;
774 for (i = 0; i < V_L1_SIZE; i++) {
775 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
779 /* flush all the translation blocks */
780 /* XXX: tb_flush is currently not thread safe */
781 void tb_flush(CPUState *env1)
783 CPUState *env;
784 #if defined(DEBUG_FLUSH)
785 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
786 (unsigned long)(code_gen_ptr - code_gen_buffer),
787 nb_tbs, nb_tbs > 0 ?
788 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
789 #endif
790 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
791 cpu_abort(env1, "Internal error: code buffer overflow\n");
793 nb_tbs = 0;
795 for(env = first_cpu; env != NULL; env = env->next_cpu) {
796 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
799 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
800 page_flush_tb();
802 code_gen_ptr = code_gen_buffer;
803 /* XXX: flush processor icache at this point if cache flush is
804 expensive */
805 tb_flush_count++;
808 #ifdef DEBUG_TB_CHECK
810 static void tb_invalidate_check(target_ulong address)
812 TranslationBlock *tb;
813 int i;
814 address &= TARGET_PAGE_MASK;
815 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
816 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
817 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
818 address >= tb->pc + tb->size)) {
819 printf("ERROR invalidate: address=" TARGET_FMT_lx
820 " PC=%08lx size=%04x\n",
821 address, (long)tb->pc, tb->size);
827 /* verify that all the pages have correct rights for code */
828 static void tb_page_check(void)
830 TranslationBlock *tb;
831 int i, flags1, flags2;
833 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
834 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
835 flags1 = page_get_flags(tb->pc);
836 flags2 = page_get_flags(tb->pc + tb->size - 1);
837 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
838 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
839 (long)tb->pc, tb->size, flags1, flags2);
845 #endif
847 /* invalidate one TB */
848 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
849 int next_offset)
851 TranslationBlock *tb1;
852 for(;;) {
853 tb1 = *ptb;
854 if (tb1 == tb) {
855 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
856 break;
858 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
862 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
864 TranslationBlock *tb1;
865 unsigned int n1;
867 for(;;) {
868 tb1 = *ptb;
869 n1 = (long)tb1 & 3;
870 tb1 = (TranslationBlock *)((long)tb1 & ~3);
871 if (tb1 == tb) {
872 *ptb = tb1->page_next[n1];
873 break;
875 ptb = &tb1->page_next[n1];
879 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
881 TranslationBlock *tb1, **ptb;
882 unsigned int n1;
884 ptb = &tb->jmp_next[n];
885 tb1 = *ptb;
886 if (tb1) {
887 /* find tb(n) in circular list */
888 for(;;) {
889 tb1 = *ptb;
890 n1 = (long)tb1 & 3;
891 tb1 = (TranslationBlock *)((long)tb1 & ~3);
892 if (n1 == n && tb1 == tb)
893 break;
894 if (n1 == 2) {
895 ptb = &tb1->jmp_first;
896 } else {
897 ptb = &tb1->jmp_next[n1];
900 /* now we can suppress tb(n) from the list */
901 *ptb = tb->jmp_next[n];
903 tb->jmp_next[n] = NULL;
907 /* reset the jump entry 'n' of a TB so that it is not chained to
908 another TB */
909 static inline void tb_reset_jump(TranslationBlock *tb, int n)
911 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
914 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
916 CPUState *env;
917 PageDesc *p;
918 unsigned int h, n1;
919 tb_page_addr_t phys_pc;
920 TranslationBlock *tb1, *tb2;
922 /* remove the TB from the hash list */
923 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
924 h = tb_phys_hash_func(phys_pc);
925 tb_remove(&tb_phys_hash[h], tb,
926 offsetof(TranslationBlock, phys_hash_next));
928 /* remove the TB from the page list */
929 if (tb->page_addr[0] != page_addr) {
930 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
931 tb_page_remove(&p->first_tb, tb);
932 invalidate_page_bitmap(p);
934 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
935 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
936 tb_page_remove(&p->first_tb, tb);
937 invalidate_page_bitmap(p);
940 tb_invalidated_flag = 1;
942 /* remove the TB from the hash list */
943 h = tb_jmp_cache_hash_func(tb->pc);
944 for(env = first_cpu; env != NULL; env = env->next_cpu) {
945 if (env->tb_jmp_cache[h] == tb)
946 env->tb_jmp_cache[h] = NULL;
949 /* suppress this TB from the two jump lists */
950 tb_jmp_remove(tb, 0);
951 tb_jmp_remove(tb, 1);
953 /* suppress any remaining jumps to this TB */
954 tb1 = tb->jmp_first;
955 for(;;) {
956 n1 = (long)tb1 & 3;
957 if (n1 == 2)
958 break;
959 tb1 = (TranslationBlock *)((long)tb1 & ~3);
960 tb2 = tb1->jmp_next[n1];
961 tb_reset_jump(tb1, n1);
962 tb1->jmp_next[n1] = NULL;
963 tb1 = tb2;
965 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
967 tb_phys_invalidate_count++;
970 static inline void set_bits(uint8_t *tab, int start, int len)
972 int end, mask, end1;
974 end = start + len;
975 tab += start >> 3;
976 mask = 0xff << (start & 7);
977 if ((start & ~7) == (end & ~7)) {
978 if (start < end) {
979 mask &= ~(0xff << (end & 7));
980 *tab |= mask;
982 } else {
983 *tab++ |= mask;
984 start = (start + 8) & ~7;
985 end1 = end & ~7;
986 while (start < end1) {
987 *tab++ = 0xff;
988 start += 8;
990 if (start < end) {
991 mask = ~(0xff << (end & 7));
992 *tab |= mask;
997 static void build_page_bitmap(PageDesc *p)
999 int n, tb_start, tb_end;
1000 TranslationBlock *tb;
1002 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1004 tb = p->first_tb;
1005 while (tb != NULL) {
1006 n = (long)tb & 3;
1007 tb = (TranslationBlock *)((long)tb & ~3);
1008 /* NOTE: this is subtle as a TB may span two physical pages */
1009 if (n == 0) {
1010 /* NOTE: tb_end may be after the end of the page, but
1011 it is not a problem */
1012 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1013 tb_end = tb_start + tb->size;
1014 if (tb_end > TARGET_PAGE_SIZE)
1015 tb_end = TARGET_PAGE_SIZE;
1016 } else {
1017 tb_start = 0;
1018 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1020 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1021 tb = tb->page_next[n];
1025 TranslationBlock *tb_gen_code(CPUState *env,
1026 target_ulong pc, target_ulong cs_base,
1027 int flags, int cflags)
1029 TranslationBlock *tb;
1030 uint8_t *tc_ptr;
1031 tb_page_addr_t phys_pc, phys_page2;
1032 target_ulong virt_page2;
1033 int code_gen_size;
1035 phys_pc = get_page_addr_code(env, pc);
1036 tb = tb_alloc(pc);
1037 if (!tb) {
1038 /* flush must be done */
1039 tb_flush(env);
1040 /* cannot fail at this point */
1041 tb = tb_alloc(pc);
1042 /* Don't forget to invalidate previous TB info. */
1043 tb_invalidated_flag = 1;
1045 tc_ptr = code_gen_ptr;
1046 tb->tc_ptr = tc_ptr;
1047 tb->cs_base = cs_base;
1048 tb->flags = flags;
1049 tb->cflags = cflags;
1050 cpu_gen_code(env, tb, &code_gen_size);
1051 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1053 /* check next page if needed */
1054 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1055 phys_page2 = -1;
1056 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1057 phys_page2 = get_page_addr_code(env, virt_page2);
1059 tb_link_page(tb, phys_pc, phys_page2);
1060 return tb;
1063 /* invalidate all TBs which intersect with the target physical page
1064 starting in range [start;end[. NOTE: start and end must refer to
1065 the same physical page. 'is_cpu_write_access' should be true if called
1066 from a real cpu write access: the virtual CPU will exit the current
1067 TB if code is modified inside this TB. */
1068 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1069 int is_cpu_write_access)
1071 TranslationBlock *tb, *tb_next, *saved_tb;
1072 CPUState *env = cpu_single_env;
1073 tb_page_addr_t tb_start, tb_end;
1074 PageDesc *p;
1075 int n;
1076 #ifdef TARGET_HAS_PRECISE_SMC
1077 int current_tb_not_found = is_cpu_write_access;
1078 TranslationBlock *current_tb = NULL;
1079 int current_tb_modified = 0;
1080 target_ulong current_pc = 0;
1081 target_ulong current_cs_base = 0;
1082 int current_flags = 0;
1083 #endif /* TARGET_HAS_PRECISE_SMC */
1085 p = page_find(start >> TARGET_PAGE_BITS);
1086 if (!p)
1087 return;
1088 if (!p->code_bitmap &&
1089 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1090 is_cpu_write_access) {
1091 /* build code bitmap */
1092 build_page_bitmap(p);
1095 /* we remove all the TBs in the range [start, end[ */
1096 /* XXX: see if in some cases it could be faster to invalidate all the code */
1097 tb = p->first_tb;
1098 while (tb != NULL) {
1099 n = (long)tb & 3;
1100 tb = (TranslationBlock *)((long)tb & ~3);
1101 tb_next = tb->page_next[n];
1102 /* NOTE: this is subtle as a TB may span two physical pages */
1103 if (n == 0) {
1104 /* NOTE: tb_end may be after the end of the page, but
1105 it is not a problem */
1106 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1107 tb_end = tb_start + tb->size;
1108 } else {
1109 tb_start = tb->page_addr[1];
1110 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1112 if (!(tb_end <= start || tb_start >= end)) {
1113 #ifdef TARGET_HAS_PRECISE_SMC
1114 if (current_tb_not_found) {
1115 current_tb_not_found = 0;
1116 current_tb = NULL;
1117 if (env->mem_io_pc) {
1118 /* now we have a real cpu fault */
1119 current_tb = tb_find_pc(env->mem_io_pc);
1122 if (current_tb == tb &&
1123 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1124 /* If we are modifying the current TB, we must stop
1125 its execution. We could be more precise by checking
1126 that the modification is after the current PC, but it
1127 would require a specialized function to partially
1128 restore the CPU state */
1130 current_tb_modified = 1;
1131 cpu_restore_state(current_tb, env, env->mem_io_pc);
1132 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1133 &current_flags);
1135 #endif /* TARGET_HAS_PRECISE_SMC */
1136 /* we need to do that to handle the case where a signal
1137 occurs while doing tb_phys_invalidate() */
1138 saved_tb = NULL;
1139 if (env) {
1140 saved_tb = env->current_tb;
1141 env->current_tb = NULL;
1143 tb_phys_invalidate(tb, -1);
1144 if (env) {
1145 env->current_tb = saved_tb;
1146 if (env->interrupt_request && env->current_tb)
1147 cpu_interrupt(env, env->interrupt_request);
1150 tb = tb_next;
1152 #if !defined(CONFIG_USER_ONLY)
1153 /* if no code remaining, no need to continue to use slow writes */
1154 if (!p->first_tb) {
1155 invalidate_page_bitmap(p);
1156 if (is_cpu_write_access) {
1157 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1160 #endif
1161 #ifdef TARGET_HAS_PRECISE_SMC
1162 if (current_tb_modified) {
1163 /* we generate a block containing just the instruction
1164 modifying the memory. It will ensure that it cannot modify
1165 itself */
1166 env->current_tb = NULL;
1167 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1168 cpu_resume_from_signal(env, NULL);
1170 #endif
1173 /* len must be <= 8 and start must be a multiple of len */
1174 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1176 PageDesc *p;
1177 int offset, b;
1178 #if 0
1179 if (1) {
1180 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1181 cpu_single_env->mem_io_vaddr, len,
1182 cpu_single_env->eip,
1183 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1185 #endif
1186 p = page_find(start >> TARGET_PAGE_BITS);
1187 if (!p)
1188 return;
1189 if (p->code_bitmap) {
1190 offset = start & ~TARGET_PAGE_MASK;
1191 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1192 if (b & ((1 << len) - 1))
1193 goto do_invalidate;
1194 } else {
1195 do_invalidate:
1196 tb_invalidate_phys_page_range(start, start + len, 1);
1200 #if !defined(CONFIG_SOFTMMU)
1201 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1202 unsigned long pc, void *puc)
1204 TranslationBlock *tb;
1205 PageDesc *p;
1206 int n;
1207 #ifdef TARGET_HAS_PRECISE_SMC
1208 TranslationBlock *current_tb = NULL;
1209 CPUState *env = cpu_single_env;
1210 int current_tb_modified = 0;
1211 target_ulong current_pc = 0;
1212 target_ulong current_cs_base = 0;
1213 int current_flags = 0;
1214 #endif
1216 addr &= TARGET_PAGE_MASK;
1217 p = page_find(addr >> TARGET_PAGE_BITS);
1218 if (!p)
1219 return;
1220 tb = p->first_tb;
1221 #ifdef TARGET_HAS_PRECISE_SMC
1222 if (tb && pc != 0) {
1223 current_tb = tb_find_pc(pc);
1225 #endif
1226 while (tb != NULL) {
1227 n = (long)tb & 3;
1228 tb = (TranslationBlock *)((long)tb & ~3);
1229 #ifdef TARGET_HAS_PRECISE_SMC
1230 if (current_tb == tb &&
1231 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1232 /* If we are modifying the current TB, we must stop
1233 its execution. We could be more precise by checking
1234 that the modification is after the current PC, but it
1235 would require a specialized function to partially
1236 restore the CPU state */
1238 current_tb_modified = 1;
1239 cpu_restore_state(current_tb, env, pc);
1240 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1241 &current_flags);
1243 #endif /* TARGET_HAS_PRECISE_SMC */
1244 tb_phys_invalidate(tb, addr);
1245 tb = tb->page_next[n];
1247 p->first_tb = NULL;
1248 #ifdef TARGET_HAS_PRECISE_SMC
1249 if (current_tb_modified) {
1250 /* we generate a block containing just the instruction
1251 modifying the memory. It will ensure that it cannot modify
1252 itself */
1253 env->current_tb = NULL;
1254 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1255 cpu_resume_from_signal(env, puc);
1257 #endif
1259 #endif
1261 /* add the tb in the target page and protect it if necessary */
1262 static inline void tb_alloc_page(TranslationBlock *tb,
1263 unsigned int n, tb_page_addr_t page_addr)
1265 PageDesc *p;
1266 #ifndef CONFIG_USER_ONLY
1267 bool page_already_protected;
1268 #endif
1270 tb->page_addr[n] = page_addr;
1271 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1272 tb->page_next[n] = p->first_tb;
1273 #ifndef CONFIG_USER_ONLY
1274 page_already_protected = p->first_tb != NULL;
1275 #endif
1276 p->first_tb = (TranslationBlock *)((long)tb | n);
1277 invalidate_page_bitmap(p);
1279 #if defined(TARGET_HAS_SMC) || 1
1281 #if defined(CONFIG_USER_ONLY)
1282 if (p->flags & PAGE_WRITE) {
1283 target_ulong addr;
1284 PageDesc *p2;
1285 int prot;
1287 /* force the host page as non writable (writes will have a
1288 page fault + mprotect overhead) */
1289 page_addr &= qemu_host_page_mask;
1290 prot = 0;
1291 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1292 addr += TARGET_PAGE_SIZE) {
1294 p2 = page_find (addr >> TARGET_PAGE_BITS);
1295 if (!p2)
1296 continue;
1297 prot |= p2->flags;
1298 p2->flags &= ~PAGE_WRITE;
1300 mprotect(g2h(page_addr), qemu_host_page_size,
1301 (prot & PAGE_BITS) & ~PAGE_WRITE);
1302 #ifdef DEBUG_TB_INVALIDATE
1303 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1304 page_addr);
1305 #endif
1307 #else
1308 /* if some code is already present, then the pages are already
1309 protected. So we handle the case where only the first TB is
1310 allocated in a physical page */
1311 if (!page_already_protected) {
1312 tlb_protect_code(page_addr);
1314 #endif
1316 #endif /* TARGET_HAS_SMC */
1319 /* add a new TB and link it to the physical page tables. phys_page2 is
1320 (-1) to indicate that only one page contains the TB. */
1321 void tb_link_page(TranslationBlock *tb,
1322 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1324 unsigned int h;
1325 TranslationBlock **ptb;
1327 /* Grab the mmap lock to stop another thread invalidating this TB
1328 before we are done. */
1329 mmap_lock();
1330 /* add in the physical hash table */
1331 h = tb_phys_hash_func(phys_pc);
1332 ptb = &tb_phys_hash[h];
1333 tb->phys_hash_next = *ptb;
1334 *ptb = tb;
1336 /* add in the page list */
1337 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1338 if (phys_page2 != -1)
1339 tb_alloc_page(tb, 1, phys_page2);
1340 else
1341 tb->page_addr[1] = -1;
1343 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1344 tb->jmp_next[0] = NULL;
1345 tb->jmp_next[1] = NULL;
1347 /* init original jump addresses */
1348 if (tb->tb_next_offset[0] != 0xffff)
1349 tb_reset_jump(tb, 0);
1350 if (tb->tb_next_offset[1] != 0xffff)
1351 tb_reset_jump(tb, 1);
1353 #ifdef DEBUG_TB_CHECK
1354 tb_page_check();
1355 #endif
1356 mmap_unlock();
1359 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1360 tb[1].tc_ptr. Return NULL if not found */
1361 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1363 int m_min, m_max, m;
1364 unsigned long v;
1365 TranslationBlock *tb;
1367 if (nb_tbs <= 0)
1368 return NULL;
1369 if (tc_ptr < (unsigned long)code_gen_buffer ||
1370 tc_ptr >= (unsigned long)code_gen_ptr)
1371 return NULL;
1372 /* binary search (cf Knuth) */
1373 m_min = 0;
1374 m_max = nb_tbs - 1;
1375 while (m_min <= m_max) {
1376 m = (m_min + m_max) >> 1;
1377 tb = &tbs[m];
1378 v = (unsigned long)tb->tc_ptr;
1379 if (v == tc_ptr)
1380 return tb;
1381 else if (tc_ptr < v) {
1382 m_max = m - 1;
1383 } else {
1384 m_min = m + 1;
1387 return &tbs[m_max];
1390 static void tb_reset_jump_recursive(TranslationBlock *tb);
1392 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1394 TranslationBlock *tb1, *tb_next, **ptb;
1395 unsigned int n1;
1397 tb1 = tb->jmp_next[n];
1398 if (tb1 != NULL) {
1399 /* find head of list */
1400 for(;;) {
1401 n1 = (long)tb1 & 3;
1402 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1403 if (n1 == 2)
1404 break;
1405 tb1 = tb1->jmp_next[n1];
1407 /* we are now sure now that tb jumps to tb1 */
1408 tb_next = tb1;
1410 /* remove tb from the jmp_first list */
1411 ptb = &tb_next->jmp_first;
1412 for(;;) {
1413 tb1 = *ptb;
1414 n1 = (long)tb1 & 3;
1415 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1416 if (n1 == n && tb1 == tb)
1417 break;
1418 ptb = &tb1->jmp_next[n1];
1420 *ptb = tb->jmp_next[n];
1421 tb->jmp_next[n] = NULL;
1423 /* suppress the jump to next tb in generated code */
1424 tb_reset_jump(tb, n);
1426 /* suppress jumps in the tb on which we could have jumped */
1427 tb_reset_jump_recursive(tb_next);
1431 static void tb_reset_jump_recursive(TranslationBlock *tb)
1433 tb_reset_jump_recursive2(tb, 0);
1434 tb_reset_jump_recursive2(tb, 1);
1437 #if defined(TARGET_HAS_ICE)
1438 #if defined(CONFIG_USER_ONLY)
1439 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1441 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1443 #else
1444 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1446 target_phys_addr_t addr;
1447 ram_addr_t ram_addr;
1448 MemoryRegionSection section;
1450 addr = cpu_get_phys_page_debug(env, pc);
1451 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1452 if (!(memory_region_is_ram(section.mr)
1453 || (section.mr->rom_device && section.mr->readable))) {
1454 return;
1456 ram_addr = (memory_region_get_ram_addr(section.mr)
1457 + section.offset_within_region) & TARGET_PAGE_MASK;
1458 ram_addr |= (pc & ~TARGET_PAGE_MASK);
1459 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1461 #endif
1462 #endif /* TARGET_HAS_ICE */
1464 #if defined(CONFIG_USER_ONLY)
1465 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1470 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1471 int flags, CPUWatchpoint **watchpoint)
1473 return -ENOSYS;
1475 #else
1476 /* Add a watchpoint. */
1477 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1478 int flags, CPUWatchpoint **watchpoint)
1480 target_ulong len_mask = ~(len - 1);
1481 CPUWatchpoint *wp;
1483 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1484 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1485 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1486 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1487 return -EINVAL;
1489 wp = g_malloc(sizeof(*wp));
1491 wp->vaddr = addr;
1492 wp->len_mask = len_mask;
1493 wp->flags = flags;
1495 /* keep all GDB-injected watchpoints in front */
1496 if (flags & BP_GDB)
1497 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1498 else
1499 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1501 tlb_flush_page(env, addr);
1503 if (watchpoint)
1504 *watchpoint = wp;
1505 return 0;
1508 /* Remove a specific watchpoint. */
1509 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1510 int flags)
1512 target_ulong len_mask = ~(len - 1);
1513 CPUWatchpoint *wp;
1515 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1516 if (addr == wp->vaddr && len_mask == wp->len_mask
1517 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1518 cpu_watchpoint_remove_by_ref(env, wp);
1519 return 0;
1522 return -ENOENT;
1525 /* Remove a specific watchpoint by reference. */
1526 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1528 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1530 tlb_flush_page(env, watchpoint->vaddr);
1532 g_free(watchpoint);
1535 /* Remove all matching watchpoints. */
1536 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1538 CPUWatchpoint *wp, *next;
1540 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1541 if (wp->flags & mask)
1542 cpu_watchpoint_remove_by_ref(env, wp);
1545 #endif
1547 /* Add a breakpoint. */
1548 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1549 CPUBreakpoint **breakpoint)
1551 #if defined(TARGET_HAS_ICE)
1552 CPUBreakpoint *bp;
1554 bp = g_malloc(sizeof(*bp));
1556 bp->pc = pc;
1557 bp->flags = flags;
1559 /* keep all GDB-injected breakpoints in front */
1560 if (flags & BP_GDB)
1561 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1562 else
1563 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1565 breakpoint_invalidate(env, pc);
1567 if (breakpoint)
1568 *breakpoint = bp;
1569 return 0;
1570 #else
1571 return -ENOSYS;
1572 #endif
1575 /* Remove a specific breakpoint. */
1576 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1578 #if defined(TARGET_HAS_ICE)
1579 CPUBreakpoint *bp;
1581 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1582 if (bp->pc == pc && bp->flags == flags) {
1583 cpu_breakpoint_remove_by_ref(env, bp);
1584 return 0;
1587 return -ENOENT;
1588 #else
1589 return -ENOSYS;
1590 #endif
1593 /* Remove a specific breakpoint by reference. */
1594 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1596 #if defined(TARGET_HAS_ICE)
1597 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1599 breakpoint_invalidate(env, breakpoint->pc);
1601 g_free(breakpoint);
1602 #endif
1605 /* Remove all matching breakpoints. */
1606 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1608 #if defined(TARGET_HAS_ICE)
1609 CPUBreakpoint *bp, *next;
1611 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1612 if (bp->flags & mask)
1613 cpu_breakpoint_remove_by_ref(env, bp);
1615 #endif
1618 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1619 CPU loop after each instruction */
1620 void cpu_single_step(CPUState *env, int enabled)
1622 #if defined(TARGET_HAS_ICE)
1623 if (env->singlestep_enabled != enabled) {
1624 env->singlestep_enabled = enabled;
1625 if (kvm_enabled())
1626 kvm_update_guest_debug(env, 0);
1627 else {
1628 /* must flush all the translated code to avoid inconsistencies */
1629 /* XXX: only flush what is necessary */
1630 tb_flush(env);
1633 #endif
1636 /* enable or disable low levels log */
1637 void cpu_set_log(int log_flags)
1639 loglevel = log_flags;
1640 if (loglevel && !logfile) {
1641 logfile = fopen(logfilename, log_append ? "a" : "w");
1642 if (!logfile) {
1643 perror(logfilename);
1644 _exit(1);
1646 #if !defined(CONFIG_SOFTMMU)
1647 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1649 static char logfile_buf[4096];
1650 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1652 #elif defined(_WIN32)
1653 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1654 setvbuf(logfile, NULL, _IONBF, 0);
1655 #else
1656 setvbuf(logfile, NULL, _IOLBF, 0);
1657 #endif
1658 log_append = 1;
1660 if (!loglevel && logfile) {
1661 fclose(logfile);
1662 logfile = NULL;
1666 void cpu_set_log_filename(const char *filename)
1668 logfilename = strdup(filename);
1669 if (logfile) {
1670 fclose(logfile);
1671 logfile = NULL;
1673 cpu_set_log(loglevel);
1676 static void cpu_unlink_tb(CPUState *env)
1678 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1679 problem and hope the cpu will stop of its own accord. For userspace
1680 emulation this often isn't actually as bad as it sounds. Often
1681 signals are used primarily to interrupt blocking syscalls. */
1682 TranslationBlock *tb;
1683 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1685 spin_lock(&interrupt_lock);
1686 tb = env->current_tb;
1687 /* if the cpu is currently executing code, we must unlink it and
1688 all the potentially executing TB */
1689 if (tb) {
1690 env->current_tb = NULL;
1691 tb_reset_jump_recursive(tb);
1693 spin_unlock(&interrupt_lock);
1696 #ifndef CONFIG_USER_ONLY
1697 /* mask must never be zero, except for A20 change call */
1698 static void tcg_handle_interrupt(CPUState *env, int mask)
1700 int old_mask;
1702 old_mask = env->interrupt_request;
1703 env->interrupt_request |= mask;
1706 * If called from iothread context, wake the target cpu in
1707 * case its halted.
1709 if (!qemu_cpu_is_self(env)) {
1710 qemu_cpu_kick(env);
1711 return;
1714 if (use_icount) {
1715 env->icount_decr.u16.high = 0xffff;
1716 if (!can_do_io(env)
1717 && (mask & ~old_mask) != 0) {
1718 cpu_abort(env, "Raised interrupt while not in I/O function");
1720 } else {
1721 cpu_unlink_tb(env);
1725 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1727 #else /* CONFIG_USER_ONLY */
1729 void cpu_interrupt(CPUState *env, int mask)
1731 env->interrupt_request |= mask;
1732 cpu_unlink_tb(env);
1734 #endif /* CONFIG_USER_ONLY */
1736 void cpu_reset_interrupt(CPUState *env, int mask)
1738 env->interrupt_request &= ~mask;
1741 void cpu_exit(CPUState *env)
1743 env->exit_request = 1;
1744 cpu_unlink_tb(env);
1747 const CPULogItem cpu_log_items[] = {
1748 { CPU_LOG_TB_OUT_ASM, "out_asm",
1749 "show generated host assembly code for each compiled TB" },
1750 { CPU_LOG_TB_IN_ASM, "in_asm",
1751 "show target assembly code for each compiled TB" },
1752 { CPU_LOG_TB_OP, "op",
1753 "show micro ops for each compiled TB" },
1754 { CPU_LOG_TB_OP_OPT, "op_opt",
1755 "show micro ops "
1756 #ifdef TARGET_I386
1757 "before eflags optimization and "
1758 #endif
1759 "after liveness analysis" },
1760 { CPU_LOG_INT, "int",
1761 "show interrupts/exceptions in short format" },
1762 { CPU_LOG_EXEC, "exec",
1763 "show trace before each executed TB (lots of logs)" },
1764 { CPU_LOG_TB_CPU, "cpu",
1765 "show CPU state before block translation" },
1766 #ifdef TARGET_I386
1767 { CPU_LOG_PCALL, "pcall",
1768 "show protected mode far calls/returns/exceptions" },
1769 { CPU_LOG_RESET, "cpu_reset",
1770 "show CPU state before CPU resets" },
1771 #endif
1772 #ifdef DEBUG_IOPORT
1773 { CPU_LOG_IOPORT, "ioport",
1774 "show all i/o ports accesses" },
1775 #endif
1776 { 0, NULL, NULL },
1779 static int cmp1(const char *s1, int n, const char *s2)
1781 if (strlen(s2) != n)
1782 return 0;
1783 return memcmp(s1, s2, n) == 0;
1786 /* takes a comma separated list of log masks. Return 0 if error. */
1787 int cpu_str_to_log_mask(const char *str)
1789 const CPULogItem *item;
1790 int mask;
1791 const char *p, *p1;
1793 p = str;
1794 mask = 0;
1795 for(;;) {
1796 p1 = strchr(p, ',');
1797 if (!p1)
1798 p1 = p + strlen(p);
1799 if(cmp1(p,p1-p,"all")) {
1800 for(item = cpu_log_items; item->mask != 0; item++) {
1801 mask |= item->mask;
1803 } else {
1804 for(item = cpu_log_items; item->mask != 0; item++) {
1805 if (cmp1(p, p1 - p, item->name))
1806 goto found;
1808 return 0;
1810 found:
1811 mask |= item->mask;
1812 if (*p1 != ',')
1813 break;
1814 p = p1 + 1;
1816 return mask;
1819 void cpu_abort(CPUState *env, const char *fmt, ...)
1821 va_list ap;
1822 va_list ap2;
1824 va_start(ap, fmt);
1825 va_copy(ap2, ap);
1826 fprintf(stderr, "qemu: fatal: ");
1827 vfprintf(stderr, fmt, ap);
1828 fprintf(stderr, "\n");
1829 #ifdef TARGET_I386
1830 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1831 #else
1832 cpu_dump_state(env, stderr, fprintf, 0);
1833 #endif
1834 if (qemu_log_enabled()) {
1835 qemu_log("qemu: fatal: ");
1836 qemu_log_vprintf(fmt, ap2);
1837 qemu_log("\n");
1838 #ifdef TARGET_I386
1839 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1840 #else
1841 log_cpu_state(env, 0);
1842 #endif
1843 qemu_log_flush();
1844 qemu_log_close();
1846 va_end(ap2);
1847 va_end(ap);
1848 #if defined(CONFIG_USER_ONLY)
1850 struct sigaction act;
1851 sigfillset(&act.sa_mask);
1852 act.sa_handler = SIG_DFL;
1853 sigaction(SIGABRT, &act, NULL);
1855 #endif
1856 abort();
1859 CPUState *cpu_copy(CPUState *env)
1861 CPUState *new_env = cpu_init(env->cpu_model_str);
1862 CPUState *next_cpu = new_env->next_cpu;
1863 int cpu_index = new_env->cpu_index;
1864 #if defined(TARGET_HAS_ICE)
1865 CPUBreakpoint *bp;
1866 CPUWatchpoint *wp;
1867 #endif
1869 memcpy(new_env, env, sizeof(CPUState));
1871 /* Preserve chaining and index. */
1872 new_env->next_cpu = next_cpu;
1873 new_env->cpu_index = cpu_index;
1875 /* Clone all break/watchpoints.
1876 Note: Once we support ptrace with hw-debug register access, make sure
1877 BP_CPU break/watchpoints are handled correctly on clone. */
1878 QTAILQ_INIT(&env->breakpoints);
1879 QTAILQ_INIT(&env->watchpoints);
1880 #if defined(TARGET_HAS_ICE)
1881 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1882 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1884 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1885 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1886 wp->flags, NULL);
1888 #endif
1890 return new_env;
1893 #if !defined(CONFIG_USER_ONLY)
1895 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1897 unsigned int i;
1899 /* Discard jump cache entries for any tb which might potentially
1900 overlap the flushed page. */
1901 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1902 memset (&env->tb_jmp_cache[i], 0,
1903 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1905 i = tb_jmp_cache_hash_page(addr);
1906 memset (&env->tb_jmp_cache[i], 0,
1907 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1910 static CPUTLBEntry s_cputlb_empty_entry = {
1911 .addr_read = -1,
1912 .addr_write = -1,
1913 .addr_code = -1,
1914 .addend = -1,
1917 /* NOTE:
1918 * If flush_global is true (the usual case), flush all tlb entries.
1919 * If flush_global is false, flush (at least) all tlb entries not
1920 * marked global.
1922 * Since QEMU doesn't currently implement a global/not-global flag
1923 * for tlb entries, at the moment tlb_flush() will also flush all
1924 * tlb entries in the flush_global == false case. This is OK because
1925 * CPU architectures generally permit an implementation to drop
1926 * entries from the TLB at any time, so flushing more entries than
1927 * required is only an efficiency issue, not a correctness issue.
1929 void tlb_flush(CPUState *env, int flush_global)
1931 int i;
1933 #if defined(DEBUG_TLB)
1934 printf("tlb_flush:\n");
1935 #endif
1936 /* must reset current TB so that interrupts cannot modify the
1937 links while we are modifying them */
1938 env->current_tb = NULL;
1940 for(i = 0; i < CPU_TLB_SIZE; i++) {
1941 int mmu_idx;
1942 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1943 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1947 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1949 env->tlb_flush_addr = -1;
1950 env->tlb_flush_mask = 0;
1951 tlb_flush_count++;
1954 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1956 if (addr == (tlb_entry->addr_read &
1957 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1958 addr == (tlb_entry->addr_write &
1959 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1960 addr == (tlb_entry->addr_code &
1961 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1962 *tlb_entry = s_cputlb_empty_entry;
1966 void tlb_flush_page(CPUState *env, target_ulong addr)
1968 int i;
1969 int mmu_idx;
1971 #if defined(DEBUG_TLB)
1972 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1973 #endif
1974 /* Check if we need to flush due to large pages. */
1975 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
1976 #if defined(DEBUG_TLB)
1977 printf("tlb_flush_page: forced full flush ("
1978 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
1979 env->tlb_flush_addr, env->tlb_flush_mask);
1980 #endif
1981 tlb_flush(env, 1);
1982 return;
1984 /* must reset current TB so that interrupts cannot modify the
1985 links while we are modifying them */
1986 env->current_tb = NULL;
1988 addr &= TARGET_PAGE_MASK;
1989 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
1990 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
1991 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
1993 tlb_flush_jmp_cache(env, addr);
1996 /* update the TLBs so that writes to code in the virtual page 'addr'
1997 can be detected */
1998 static void tlb_protect_code(ram_addr_t ram_addr)
2000 cpu_physical_memory_reset_dirty(ram_addr,
2001 ram_addr + TARGET_PAGE_SIZE,
2002 CODE_DIRTY_FLAG);
2005 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2006 tested for self modifying code */
2007 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2008 target_ulong vaddr)
2010 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2013 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2014 unsigned long start, unsigned long length)
2016 unsigned long addr;
2017 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2018 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2019 if ((addr - start) < length) {
2020 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2025 /* Note: start and end must be within the same ram block. */
2026 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2027 int dirty_flags)
2029 CPUState *env;
2030 unsigned long length, start1;
2031 int i;
2033 start &= TARGET_PAGE_MASK;
2034 end = TARGET_PAGE_ALIGN(end);
2036 length = end - start;
2037 if (length == 0)
2038 return;
2039 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2041 /* we modify the TLB cache so that the dirty bit will be set again
2042 when accessing the range */
2043 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2044 /* Check that we don't span multiple blocks - this breaks the
2045 address comparisons below. */
2046 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2047 != (end - 1) - start) {
2048 abort();
2051 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2052 int mmu_idx;
2053 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2054 for(i = 0; i < CPU_TLB_SIZE; i++)
2055 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2056 start1, length);
2061 int cpu_physical_memory_set_dirty_tracking(int enable)
2063 int ret = 0;
2064 in_migration = enable;
2065 return ret;
2068 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2070 ram_addr_t ram_addr;
2071 void *p;
2073 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2074 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2075 + tlb_entry->addend);
2076 ram_addr = qemu_ram_addr_from_host_nofail(p);
2077 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2078 tlb_entry->addr_write |= TLB_NOTDIRTY;
2083 /* update the TLB according to the current state of the dirty bits */
2084 void cpu_tlb_update_dirty(CPUState *env)
2086 int i;
2087 int mmu_idx;
2088 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2089 for(i = 0; i < CPU_TLB_SIZE; i++)
2090 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2094 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2096 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2097 tlb_entry->addr_write = vaddr;
2100 /* update the TLB corresponding to virtual page vaddr
2101 so that it is no longer dirty */
2102 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2104 int i;
2105 int mmu_idx;
2107 vaddr &= TARGET_PAGE_MASK;
2108 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2109 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2110 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2113 /* Our TLB does not support large pages, so remember the area covered by
2114 large pages and trigger a full TLB flush if these are invalidated. */
2115 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2116 target_ulong size)
2118 target_ulong mask = ~(size - 1);
2120 if (env->tlb_flush_addr == (target_ulong)-1) {
2121 env->tlb_flush_addr = vaddr & mask;
2122 env->tlb_flush_mask = mask;
2123 return;
2125 /* Extend the existing region to include the new page.
2126 This is a compromise between unnecessary flushes and the cost
2127 of maintaining a full variable size TLB. */
2128 mask &= env->tlb_flush_mask;
2129 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2130 mask <<= 1;
2132 env->tlb_flush_addr &= mask;
2133 env->tlb_flush_mask = mask;
2136 static bool is_ram_rom(MemoryRegionSection *s)
2138 return memory_region_is_ram(s->mr);
2141 static bool is_romd(MemoryRegionSection *s)
2143 MemoryRegion *mr = s->mr;
2145 return mr->rom_device && mr->readable;
2148 static bool is_ram_rom_romd(MemoryRegionSection *s)
2150 return is_ram_rom(s) || is_romd(s);
2153 /* Add a new TLB entry. At most one entry for a given virtual address
2154 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2155 supplied size is only used by tlb_flush_page. */
2156 void tlb_set_page(CPUState *env, target_ulong vaddr,
2157 target_phys_addr_t paddr, int prot,
2158 int mmu_idx, target_ulong size)
2160 MemoryRegionSection section;
2161 unsigned int index;
2162 target_ulong address;
2163 target_ulong code_address;
2164 unsigned long addend;
2165 CPUTLBEntry *te;
2166 CPUWatchpoint *wp;
2167 target_phys_addr_t iotlb;
2169 assert(size >= TARGET_PAGE_SIZE);
2170 if (size != TARGET_PAGE_SIZE) {
2171 tlb_add_large_page(env, vaddr, size);
2173 section = phys_page_find(paddr >> TARGET_PAGE_BITS);
2174 #if defined(DEBUG_TLB)
2175 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2176 " prot=%x idx=%d pd=0x%08lx\n",
2177 vaddr, paddr, prot, mmu_idx, pd);
2178 #endif
2180 address = vaddr;
2181 if (!is_ram_rom_romd(&section)) {
2182 /* IO memory case (romd handled later) */
2183 address |= TLB_MMIO;
2185 if (is_ram_rom_romd(&section)) {
2186 addend = (unsigned long)(memory_region_get_ram_ptr(section.mr)
2187 + section.offset_within_region);
2188 } else {
2189 addend = 0;
2191 if (is_ram_rom(&section)) {
2192 /* Normal RAM. */
2193 iotlb = (memory_region_get_ram_addr(section.mr)
2194 + section.offset_within_region) & TARGET_PAGE_MASK;
2195 if (!section.readonly)
2196 iotlb |= io_mem_notdirty.ram_addr;
2197 else
2198 iotlb |= io_mem_rom.ram_addr;
2199 } else {
2200 /* IO handlers are currently passed a physical address.
2201 It would be nice to pass an offset from the base address
2202 of that region. This would avoid having to special case RAM,
2203 and avoid full address decoding in every device.
2204 We can't use the high bits of pd for this because
2205 IO_MEM_ROMD uses these as a ram address. */
2206 iotlb = memory_region_get_ram_addr(section.mr) & ~TARGET_PAGE_MASK;
2207 iotlb += section.offset_within_region;
2210 code_address = address;
2211 /* Make accesses to pages with watchpoints go via the
2212 watchpoint trap routines. */
2213 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2214 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2215 /* Avoid trapping reads of pages with a write breakpoint. */
2216 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2217 iotlb = io_mem_watch.ram_addr + paddr;
2218 address |= TLB_MMIO;
2219 break;
2224 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2225 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2226 te = &env->tlb_table[mmu_idx][index];
2227 te->addend = addend - vaddr;
2228 if (prot & PAGE_READ) {
2229 te->addr_read = address;
2230 } else {
2231 te->addr_read = -1;
2234 if (prot & PAGE_EXEC) {
2235 te->addr_code = code_address;
2236 } else {
2237 te->addr_code = -1;
2239 if (prot & PAGE_WRITE) {
2240 if ((memory_region_is_ram(section.mr) && section.readonly)
2241 || is_romd(&section)) {
2242 /* Write access calls the I/O callback. */
2243 te->addr_write = address | TLB_MMIO;
2244 } else if (memory_region_is_ram(section.mr)
2245 && !cpu_physical_memory_is_dirty(
2246 section.mr->ram_addr
2247 + section.offset_within_region)) {
2248 te->addr_write = address | TLB_NOTDIRTY;
2249 } else {
2250 te->addr_write = address;
2252 } else {
2253 te->addr_write = -1;
2257 #else
2259 void tlb_flush(CPUState *env, int flush_global)
2263 void tlb_flush_page(CPUState *env, target_ulong addr)
2268 * Walks guest process memory "regions" one by one
2269 * and calls callback function 'fn' for each region.
2272 struct walk_memory_regions_data
2274 walk_memory_regions_fn fn;
2275 void *priv;
2276 unsigned long start;
2277 int prot;
2280 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2281 abi_ulong end, int new_prot)
2283 if (data->start != -1ul) {
2284 int rc = data->fn(data->priv, data->start, end, data->prot);
2285 if (rc != 0) {
2286 return rc;
2290 data->start = (new_prot ? end : -1ul);
2291 data->prot = new_prot;
2293 return 0;
2296 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2297 abi_ulong base, int level, void **lp)
2299 abi_ulong pa;
2300 int i, rc;
2302 if (*lp == NULL) {
2303 return walk_memory_regions_end(data, base, 0);
2306 if (level == 0) {
2307 PageDesc *pd = *lp;
2308 for (i = 0; i < L2_SIZE; ++i) {
2309 int prot = pd[i].flags;
2311 pa = base | (i << TARGET_PAGE_BITS);
2312 if (prot != data->prot) {
2313 rc = walk_memory_regions_end(data, pa, prot);
2314 if (rc != 0) {
2315 return rc;
2319 } else {
2320 void **pp = *lp;
2321 for (i = 0; i < L2_SIZE; ++i) {
2322 pa = base | ((abi_ulong)i <<
2323 (TARGET_PAGE_BITS + L2_BITS * level));
2324 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2325 if (rc != 0) {
2326 return rc;
2331 return 0;
2334 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2336 struct walk_memory_regions_data data;
2337 unsigned long i;
2339 data.fn = fn;
2340 data.priv = priv;
2341 data.start = -1ul;
2342 data.prot = 0;
2344 for (i = 0; i < V_L1_SIZE; i++) {
2345 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2346 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2347 if (rc != 0) {
2348 return rc;
2352 return walk_memory_regions_end(&data, 0, 0);
2355 static int dump_region(void *priv, abi_ulong start,
2356 abi_ulong end, unsigned long prot)
2358 FILE *f = (FILE *)priv;
2360 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2361 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2362 start, end, end - start,
2363 ((prot & PAGE_READ) ? 'r' : '-'),
2364 ((prot & PAGE_WRITE) ? 'w' : '-'),
2365 ((prot & PAGE_EXEC) ? 'x' : '-'));
2367 return (0);
2370 /* dump memory mappings */
2371 void page_dump(FILE *f)
2373 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2374 "start", "end", "size", "prot");
2375 walk_memory_regions(f, dump_region);
2378 int page_get_flags(target_ulong address)
2380 PageDesc *p;
2382 p = page_find(address >> TARGET_PAGE_BITS);
2383 if (!p)
2384 return 0;
2385 return p->flags;
2388 /* Modify the flags of a page and invalidate the code if necessary.
2389 The flag PAGE_WRITE_ORG is positioned automatically depending
2390 on PAGE_WRITE. The mmap_lock should already be held. */
2391 void page_set_flags(target_ulong start, target_ulong end, int flags)
2393 target_ulong addr, len;
2395 /* This function should never be called with addresses outside the
2396 guest address space. If this assert fires, it probably indicates
2397 a missing call to h2g_valid. */
2398 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2399 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2400 #endif
2401 assert(start < end);
2403 start = start & TARGET_PAGE_MASK;
2404 end = TARGET_PAGE_ALIGN(end);
2406 if (flags & PAGE_WRITE) {
2407 flags |= PAGE_WRITE_ORG;
2410 for (addr = start, len = end - start;
2411 len != 0;
2412 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2413 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2415 /* If the write protection bit is set, then we invalidate
2416 the code inside. */
2417 if (!(p->flags & PAGE_WRITE) &&
2418 (flags & PAGE_WRITE) &&
2419 p->first_tb) {
2420 tb_invalidate_phys_page(addr, 0, NULL);
2422 p->flags = flags;
2426 int page_check_range(target_ulong start, target_ulong len, int flags)
2428 PageDesc *p;
2429 target_ulong end;
2430 target_ulong addr;
2432 /* This function should never be called with addresses outside the
2433 guest address space. If this assert fires, it probably indicates
2434 a missing call to h2g_valid. */
2435 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2436 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2437 #endif
2439 if (len == 0) {
2440 return 0;
2442 if (start + len - 1 < start) {
2443 /* We've wrapped around. */
2444 return -1;
2447 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2448 start = start & TARGET_PAGE_MASK;
2450 for (addr = start, len = end - start;
2451 len != 0;
2452 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2453 p = page_find(addr >> TARGET_PAGE_BITS);
2454 if( !p )
2455 return -1;
2456 if( !(p->flags & PAGE_VALID) )
2457 return -1;
2459 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2460 return -1;
2461 if (flags & PAGE_WRITE) {
2462 if (!(p->flags & PAGE_WRITE_ORG))
2463 return -1;
2464 /* unprotect the page if it was put read-only because it
2465 contains translated code */
2466 if (!(p->flags & PAGE_WRITE)) {
2467 if (!page_unprotect(addr, 0, NULL))
2468 return -1;
2470 return 0;
2473 return 0;
2476 /* called from signal handler: invalidate the code and unprotect the
2477 page. Return TRUE if the fault was successfully handled. */
2478 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2480 unsigned int prot;
2481 PageDesc *p;
2482 target_ulong host_start, host_end, addr;
2484 /* Technically this isn't safe inside a signal handler. However we
2485 know this only ever happens in a synchronous SEGV handler, so in
2486 practice it seems to be ok. */
2487 mmap_lock();
2489 p = page_find(address >> TARGET_PAGE_BITS);
2490 if (!p) {
2491 mmap_unlock();
2492 return 0;
2495 /* if the page was really writable, then we change its
2496 protection back to writable */
2497 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2498 host_start = address & qemu_host_page_mask;
2499 host_end = host_start + qemu_host_page_size;
2501 prot = 0;
2502 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2503 p = page_find(addr >> TARGET_PAGE_BITS);
2504 p->flags |= PAGE_WRITE;
2505 prot |= p->flags;
2507 /* and since the content will be modified, we must invalidate
2508 the corresponding translated code. */
2509 tb_invalidate_phys_page(addr, pc, puc);
2510 #ifdef DEBUG_TB_CHECK
2511 tb_invalidate_check(addr);
2512 #endif
2514 mprotect((void *)g2h(host_start), qemu_host_page_size,
2515 prot & PAGE_BITS);
2517 mmap_unlock();
2518 return 1;
2520 mmap_unlock();
2521 return 0;
2524 static inline void tlb_set_dirty(CPUState *env,
2525 unsigned long addr, target_ulong vaddr)
2528 #endif /* defined(CONFIG_USER_ONLY) */
2530 #if !defined(CONFIG_USER_ONLY)
2532 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2533 typedef struct subpage_t {
2534 MemoryRegion iomem;
2535 target_phys_addr_t base;
2536 uint16_t sub_section[TARGET_PAGE_SIZE];
2537 } subpage_t;
2539 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2540 uint16_t section);
2541 static subpage_t *subpage_init (target_phys_addr_t base, uint16_t *section,
2542 uint16_t orig_section);
2543 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2544 need_subpage) \
2545 do { \
2546 if (addr > start_addr) \
2547 start_addr2 = 0; \
2548 else { \
2549 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2550 if (start_addr2 > 0) \
2551 need_subpage = 1; \
2554 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2555 end_addr2 = TARGET_PAGE_SIZE - 1; \
2556 else { \
2557 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2558 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2559 need_subpage = 1; \
2561 } while (0)
2563 static void destroy_page_desc(uint16_t section_index)
2565 MemoryRegionSection *section = &phys_sections[section_index];
2566 MemoryRegion *mr = section->mr;
2568 if (mr->subpage) {
2569 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2570 memory_region_destroy(&subpage->iomem);
2571 g_free(subpage);
2575 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2577 unsigned i;
2578 PhysPageEntry *p;
2580 if (lp->u.node == PHYS_MAP_NODE_NIL) {
2581 return;
2584 p = phys_map_nodes[lp->u.node];
2585 for (i = 0; i < L2_SIZE; ++i) {
2586 if (level > 0) {
2587 destroy_l2_mapping(&p[i], level - 1);
2588 } else {
2589 destroy_page_desc(p[i].u.leaf);
2592 lp->u.node = PHYS_MAP_NODE_NIL;
2595 static void destroy_all_mappings(void)
2597 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2598 phys_map_nodes_reset();
2601 static uint16_t phys_section_add(MemoryRegionSection *section)
2603 if (phys_sections_nb == phys_sections_nb_alloc) {
2604 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2605 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2606 phys_sections_nb_alloc);
2608 phys_sections[phys_sections_nb] = *section;
2609 return phys_sections_nb++;
2612 static void phys_sections_clear(void)
2614 phys_sections_nb = 0;
2617 /* register physical memory.
2618 For RAM, 'size' must be a multiple of the target page size.
2619 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2620 io memory page. The address used when calling the IO function is
2621 the offset from the start of the region, plus region_offset. Both
2622 start_addr and region_offset are rounded down to a page boundary
2623 before calculating this offset. This should not be a problem unless
2624 the low bits of start_addr and region_offset differ. */
2625 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2626 bool readonly)
2628 target_phys_addr_t start_addr = section->offset_within_address_space;
2629 ram_addr_t size = section->size;
2630 target_phys_addr_t addr, end_addr;
2631 ram_addr_t orig_size = size;
2632 subpage_t *subpage;
2633 uint16_t section_index = phys_section_add(section);
2635 assert(size);
2637 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2638 end_addr = start_addr + (target_phys_addr_t)size;
2640 addr = start_addr;
2641 do {
2642 uint16_t *p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2643 uint16_t orig_memory = *p;
2644 target_phys_addr_t start_addr2, end_addr2;
2645 int need_subpage = 0;
2646 MemoryRegion *mr = phys_sections[orig_memory].mr;
2648 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2649 need_subpage);
2650 if (need_subpage) {
2651 if (!(mr->subpage)) {
2652 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2653 p, orig_memory);
2654 } else {
2655 subpage = container_of(mr, subpage_t, iomem);
2657 subpage_register(subpage, start_addr2, end_addr2,
2658 section_index);
2659 } else {
2660 *p = section_index;
2662 addr += TARGET_PAGE_SIZE;
2663 } while (addr != end_addr);
2666 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2668 if (kvm_enabled())
2669 kvm_coalesce_mmio_region(addr, size);
2672 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2674 if (kvm_enabled())
2675 kvm_uncoalesce_mmio_region(addr, size);
2678 void qemu_flush_coalesced_mmio_buffer(void)
2680 if (kvm_enabled())
2681 kvm_flush_coalesced_mmio_buffer();
2684 #if defined(__linux__) && !defined(TARGET_S390X)
2686 #include <sys/vfs.h>
2688 #define HUGETLBFS_MAGIC 0x958458f6
2690 static long gethugepagesize(const char *path)
2692 struct statfs fs;
2693 int ret;
2695 do {
2696 ret = statfs(path, &fs);
2697 } while (ret != 0 && errno == EINTR);
2699 if (ret != 0) {
2700 perror(path);
2701 return 0;
2704 if (fs.f_type != HUGETLBFS_MAGIC)
2705 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2707 return fs.f_bsize;
2710 static void *file_ram_alloc(RAMBlock *block,
2711 ram_addr_t memory,
2712 const char *path)
2714 char *filename;
2715 void *area;
2716 int fd;
2717 #ifdef MAP_POPULATE
2718 int flags;
2719 #endif
2720 unsigned long hpagesize;
2722 hpagesize = gethugepagesize(path);
2723 if (!hpagesize) {
2724 return NULL;
2727 if (memory < hpagesize) {
2728 return NULL;
2731 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2732 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2733 return NULL;
2736 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2737 return NULL;
2740 fd = mkstemp(filename);
2741 if (fd < 0) {
2742 perror("unable to create backing store for hugepages");
2743 free(filename);
2744 return NULL;
2746 unlink(filename);
2747 free(filename);
2749 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2752 * ftruncate is not supported by hugetlbfs in older
2753 * hosts, so don't bother bailing out on errors.
2754 * If anything goes wrong with it under other filesystems,
2755 * mmap will fail.
2757 if (ftruncate(fd, memory))
2758 perror("ftruncate");
2760 #ifdef MAP_POPULATE
2761 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2762 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2763 * to sidestep this quirk.
2765 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2766 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2767 #else
2768 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2769 #endif
2770 if (area == MAP_FAILED) {
2771 perror("file_ram_alloc: can't mmap RAM pages");
2772 close(fd);
2773 return (NULL);
2775 block->fd = fd;
2776 return area;
2778 #endif
2780 static ram_addr_t find_ram_offset(ram_addr_t size)
2782 RAMBlock *block, *next_block;
2783 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2785 if (QLIST_EMPTY(&ram_list.blocks))
2786 return 0;
2788 QLIST_FOREACH(block, &ram_list.blocks, next) {
2789 ram_addr_t end, next = RAM_ADDR_MAX;
2791 end = block->offset + block->length;
2793 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2794 if (next_block->offset >= end) {
2795 next = MIN(next, next_block->offset);
2798 if (next - end >= size && next - end < mingap) {
2799 offset = end;
2800 mingap = next - end;
2804 if (offset == RAM_ADDR_MAX) {
2805 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2806 (uint64_t)size);
2807 abort();
2810 return offset;
2813 static ram_addr_t last_ram_offset(void)
2815 RAMBlock *block;
2816 ram_addr_t last = 0;
2818 QLIST_FOREACH(block, &ram_list.blocks, next)
2819 last = MAX(last, block->offset + block->length);
2821 return last;
2824 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2826 RAMBlock *new_block, *block;
2828 new_block = NULL;
2829 QLIST_FOREACH(block, &ram_list.blocks, next) {
2830 if (block->offset == addr) {
2831 new_block = block;
2832 break;
2835 assert(new_block);
2836 assert(!new_block->idstr[0]);
2838 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2839 char *id = dev->parent_bus->info->get_dev_path(dev);
2840 if (id) {
2841 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2842 g_free(id);
2845 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2847 QLIST_FOREACH(block, &ram_list.blocks, next) {
2848 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2849 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2850 new_block->idstr);
2851 abort();
2856 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2857 MemoryRegion *mr)
2859 RAMBlock *new_block;
2861 size = TARGET_PAGE_ALIGN(size);
2862 new_block = g_malloc0(sizeof(*new_block));
2864 new_block->mr = mr;
2865 new_block->offset = find_ram_offset(size);
2866 if (host) {
2867 new_block->host = host;
2868 new_block->flags |= RAM_PREALLOC_MASK;
2869 } else {
2870 if (mem_path) {
2871 #if defined (__linux__) && !defined(TARGET_S390X)
2872 new_block->host = file_ram_alloc(new_block, size, mem_path);
2873 if (!new_block->host) {
2874 new_block->host = qemu_vmalloc(size);
2875 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2877 #else
2878 fprintf(stderr, "-mem-path option unsupported\n");
2879 exit(1);
2880 #endif
2881 } else {
2882 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2883 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2884 an system defined value, which is at least 256GB. Larger systems
2885 have larger values. We put the guest between the end of data
2886 segment (system break) and this value. We use 32GB as a base to
2887 have enough room for the system break to grow. */
2888 new_block->host = mmap((void*)0x800000000, size,
2889 PROT_EXEC|PROT_READ|PROT_WRITE,
2890 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2891 if (new_block->host == MAP_FAILED) {
2892 fprintf(stderr, "Allocating RAM failed\n");
2893 abort();
2895 #else
2896 if (xen_enabled()) {
2897 xen_ram_alloc(new_block->offset, size, mr);
2898 } else {
2899 new_block->host = qemu_vmalloc(size);
2901 #endif
2902 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2905 new_block->length = size;
2907 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2909 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2910 last_ram_offset() >> TARGET_PAGE_BITS);
2911 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2912 0xff, size >> TARGET_PAGE_BITS);
2914 if (kvm_enabled())
2915 kvm_setup_guest_memory(new_block->host, size);
2917 return new_block->offset;
2920 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2922 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2925 void qemu_ram_free_from_ptr(ram_addr_t addr)
2927 RAMBlock *block;
2929 QLIST_FOREACH(block, &ram_list.blocks, next) {
2930 if (addr == block->offset) {
2931 QLIST_REMOVE(block, next);
2932 g_free(block);
2933 return;
2938 void qemu_ram_free(ram_addr_t addr)
2940 RAMBlock *block;
2942 QLIST_FOREACH(block, &ram_list.blocks, next) {
2943 if (addr == block->offset) {
2944 QLIST_REMOVE(block, next);
2945 if (block->flags & RAM_PREALLOC_MASK) {
2947 } else if (mem_path) {
2948 #if defined (__linux__) && !defined(TARGET_S390X)
2949 if (block->fd) {
2950 munmap(block->host, block->length);
2951 close(block->fd);
2952 } else {
2953 qemu_vfree(block->host);
2955 #else
2956 abort();
2957 #endif
2958 } else {
2959 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2960 munmap(block->host, block->length);
2961 #else
2962 if (xen_enabled()) {
2963 xen_invalidate_map_cache_entry(block->host);
2964 } else {
2965 qemu_vfree(block->host);
2967 #endif
2969 g_free(block);
2970 return;
2976 #ifndef _WIN32
2977 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2979 RAMBlock *block;
2980 ram_addr_t offset;
2981 int flags;
2982 void *area, *vaddr;
2984 QLIST_FOREACH(block, &ram_list.blocks, next) {
2985 offset = addr - block->offset;
2986 if (offset < block->length) {
2987 vaddr = block->host + offset;
2988 if (block->flags & RAM_PREALLOC_MASK) {
2990 } else {
2991 flags = MAP_FIXED;
2992 munmap(vaddr, length);
2993 if (mem_path) {
2994 #if defined(__linux__) && !defined(TARGET_S390X)
2995 if (block->fd) {
2996 #ifdef MAP_POPULATE
2997 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2998 MAP_PRIVATE;
2999 #else
3000 flags |= MAP_PRIVATE;
3001 #endif
3002 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3003 flags, block->fd, offset);
3004 } else {
3005 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3006 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3007 flags, -1, 0);
3009 #else
3010 abort();
3011 #endif
3012 } else {
3013 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3014 flags |= MAP_SHARED | MAP_ANONYMOUS;
3015 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3016 flags, -1, 0);
3017 #else
3018 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3019 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3020 flags, -1, 0);
3021 #endif
3023 if (area != vaddr) {
3024 fprintf(stderr, "Could not remap addr: "
3025 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3026 length, addr);
3027 exit(1);
3029 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3031 return;
3035 #endif /* !_WIN32 */
3037 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3038 With the exception of the softmmu code in this file, this should
3039 only be used for local memory (e.g. video ram) that the device owns,
3040 and knows it isn't going to access beyond the end of the block.
3042 It should not be used for general purpose DMA.
3043 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3045 void *qemu_get_ram_ptr(ram_addr_t addr)
3047 RAMBlock *block;
3049 QLIST_FOREACH(block, &ram_list.blocks, next) {
3050 if (addr - block->offset < block->length) {
3051 /* Move this entry to to start of the list. */
3052 if (block != QLIST_FIRST(&ram_list.blocks)) {
3053 QLIST_REMOVE(block, next);
3054 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3056 if (xen_enabled()) {
3057 /* We need to check if the requested address is in the RAM
3058 * because we don't want to map the entire memory in QEMU.
3059 * In that case just map until the end of the page.
3061 if (block->offset == 0) {
3062 return xen_map_cache(addr, 0, 0);
3063 } else if (block->host == NULL) {
3064 block->host =
3065 xen_map_cache(block->offset, block->length, 1);
3068 return block->host + (addr - block->offset);
3072 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3073 abort();
3075 return NULL;
3078 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3079 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3081 void *qemu_safe_ram_ptr(ram_addr_t addr)
3083 RAMBlock *block;
3085 QLIST_FOREACH(block, &ram_list.blocks, next) {
3086 if (addr - block->offset < block->length) {
3087 if (xen_enabled()) {
3088 /* We need to check if the requested address is in the RAM
3089 * because we don't want to map the entire memory in QEMU.
3090 * In that case just map until the end of the page.
3092 if (block->offset == 0) {
3093 return xen_map_cache(addr, 0, 0);
3094 } else if (block->host == NULL) {
3095 block->host =
3096 xen_map_cache(block->offset, block->length, 1);
3099 return block->host + (addr - block->offset);
3103 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3104 abort();
3106 return NULL;
3109 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3110 * but takes a size argument */
3111 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3113 if (*size == 0) {
3114 return NULL;
3116 if (xen_enabled()) {
3117 return xen_map_cache(addr, *size, 1);
3118 } else {
3119 RAMBlock *block;
3121 QLIST_FOREACH(block, &ram_list.blocks, next) {
3122 if (addr - block->offset < block->length) {
3123 if (addr - block->offset + *size > block->length)
3124 *size = block->length - addr + block->offset;
3125 return block->host + (addr - block->offset);
3129 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3130 abort();
3134 void qemu_put_ram_ptr(void *addr)
3136 trace_qemu_put_ram_ptr(addr);
3139 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3141 RAMBlock *block;
3142 uint8_t *host = ptr;
3144 if (xen_enabled()) {
3145 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3146 return 0;
3149 QLIST_FOREACH(block, &ram_list.blocks, next) {
3150 /* This case append when the block is not mapped. */
3151 if (block->host == NULL) {
3152 continue;
3154 if (host - block->host < block->length) {
3155 *ram_addr = block->offset + (host - block->host);
3156 return 0;
3160 return -1;
3163 /* Some of the softmmu routines need to translate from a host pointer
3164 (typically a TLB entry) back to a ram offset. */
3165 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3167 ram_addr_t ram_addr;
3169 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3170 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3171 abort();
3173 return ram_addr;
3176 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
3177 unsigned size)
3179 #ifdef DEBUG_UNASSIGNED
3180 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3181 #endif
3182 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3183 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
3184 #endif
3185 return 0;
3188 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
3189 uint64_t val, unsigned size)
3191 #ifdef DEBUG_UNASSIGNED
3192 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
3193 #endif
3194 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3195 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
3196 #endif
3199 static const MemoryRegionOps unassigned_mem_ops = {
3200 .read = unassigned_mem_read,
3201 .write = unassigned_mem_write,
3202 .endianness = DEVICE_NATIVE_ENDIAN,
3205 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
3206 unsigned size)
3208 abort();
3211 static void error_mem_write(void *opaque, target_phys_addr_t addr,
3212 uint64_t value, unsigned size)
3214 abort();
3217 static const MemoryRegionOps error_mem_ops = {
3218 .read = error_mem_read,
3219 .write = error_mem_write,
3220 .endianness = DEVICE_NATIVE_ENDIAN,
3223 static const MemoryRegionOps rom_mem_ops = {
3224 .read = error_mem_read,
3225 .write = unassigned_mem_write,
3226 .endianness = DEVICE_NATIVE_ENDIAN,
3229 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
3230 uint64_t val, unsigned size)
3232 int dirty_flags;
3233 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3234 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3235 #if !defined(CONFIG_USER_ONLY)
3236 tb_invalidate_phys_page_fast(ram_addr, size);
3237 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3238 #endif
3240 switch (size) {
3241 case 1:
3242 stb_p(qemu_get_ram_ptr(ram_addr), val);
3243 break;
3244 case 2:
3245 stw_p(qemu_get_ram_ptr(ram_addr), val);
3246 break;
3247 case 4:
3248 stl_p(qemu_get_ram_ptr(ram_addr), val);
3249 break;
3250 default:
3251 abort();
3253 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3254 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3255 /* we remove the notdirty callback only if the code has been
3256 flushed */
3257 if (dirty_flags == 0xff)
3258 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3261 static const MemoryRegionOps notdirty_mem_ops = {
3262 .read = error_mem_read,
3263 .write = notdirty_mem_write,
3264 .endianness = DEVICE_NATIVE_ENDIAN,
3267 /* Generate a debug exception if a watchpoint has been hit. */
3268 static void check_watchpoint(int offset, int len_mask, int flags)
3270 CPUState *env = cpu_single_env;
3271 target_ulong pc, cs_base;
3272 TranslationBlock *tb;
3273 target_ulong vaddr;
3274 CPUWatchpoint *wp;
3275 int cpu_flags;
3277 if (env->watchpoint_hit) {
3278 /* We re-entered the check after replacing the TB. Now raise
3279 * the debug interrupt so that is will trigger after the
3280 * current instruction. */
3281 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3282 return;
3284 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3285 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3286 if ((vaddr == (wp->vaddr & len_mask) ||
3287 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3288 wp->flags |= BP_WATCHPOINT_HIT;
3289 if (!env->watchpoint_hit) {
3290 env->watchpoint_hit = wp;
3291 tb = tb_find_pc(env->mem_io_pc);
3292 if (!tb) {
3293 cpu_abort(env, "check_watchpoint: could not find TB for "
3294 "pc=%p", (void *)env->mem_io_pc);
3296 cpu_restore_state(tb, env, env->mem_io_pc);
3297 tb_phys_invalidate(tb, -1);
3298 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3299 env->exception_index = EXCP_DEBUG;
3300 } else {
3301 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3302 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3304 cpu_resume_from_signal(env, NULL);
3306 } else {
3307 wp->flags &= ~BP_WATCHPOINT_HIT;
3312 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3313 so these check for a hit then pass through to the normal out-of-line
3314 phys routines. */
3315 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3316 unsigned size)
3318 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3319 switch (size) {
3320 case 1: return ldub_phys(addr);
3321 case 2: return lduw_phys(addr);
3322 case 4: return ldl_phys(addr);
3323 default: abort();
3327 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3328 uint64_t val, unsigned size)
3330 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3331 switch (size) {
3332 case 1: stb_phys(addr, val);
3333 case 2: stw_phys(addr, val);
3334 case 4: stl_phys(addr, val);
3335 default: abort();
3339 static const MemoryRegionOps watch_mem_ops = {
3340 .read = watch_mem_read,
3341 .write = watch_mem_write,
3342 .endianness = DEVICE_NATIVE_ENDIAN,
3345 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3346 unsigned len)
3348 subpage_t *mmio = opaque;
3349 unsigned int idx = SUBPAGE_IDX(addr);
3350 MemoryRegionSection *section;
3351 #if defined(DEBUG_SUBPAGE)
3352 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3353 mmio, len, addr, idx);
3354 #endif
3356 section = &phys_sections[mmio->sub_section[idx]];
3357 addr += mmio->base;
3358 addr -= section->offset_within_address_space;
3359 addr += section->offset_within_region;
3360 return io_mem_read(section->mr->ram_addr, addr, len);
3363 static void subpage_write(void *opaque, target_phys_addr_t addr,
3364 uint64_t value, unsigned len)
3366 subpage_t *mmio = opaque;
3367 unsigned int idx = SUBPAGE_IDX(addr);
3368 MemoryRegionSection *section;
3369 #if defined(DEBUG_SUBPAGE)
3370 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3371 " idx %d value %"PRIx64"\n",
3372 __func__, mmio, len, addr, idx, value);
3373 #endif
3375 section = &phys_sections[mmio->sub_section[idx]];
3376 addr += mmio->base;
3377 addr -= section->offset_within_address_space;
3378 addr += section->offset_within_region;
3379 io_mem_write(section->mr->ram_addr, addr, value, len);
3382 static const MemoryRegionOps subpage_ops = {
3383 .read = subpage_read,
3384 .write = subpage_write,
3385 .endianness = DEVICE_NATIVE_ENDIAN,
3388 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3389 unsigned size)
3391 ram_addr_t raddr = addr;
3392 void *ptr = qemu_get_ram_ptr(raddr);
3393 switch (size) {
3394 case 1: return ldub_p(ptr);
3395 case 2: return lduw_p(ptr);
3396 case 4: return ldl_p(ptr);
3397 default: abort();
3401 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3402 uint64_t value, unsigned size)
3404 ram_addr_t raddr = addr;
3405 void *ptr = qemu_get_ram_ptr(raddr);
3406 switch (size) {
3407 case 1: return stb_p(ptr, value);
3408 case 2: return stw_p(ptr, value);
3409 case 4: return stl_p(ptr, value);
3410 default: abort();
3414 static const MemoryRegionOps subpage_ram_ops = {
3415 .read = subpage_ram_read,
3416 .write = subpage_ram_write,
3417 .endianness = DEVICE_NATIVE_ENDIAN,
3420 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3421 uint16_t section)
3423 int idx, eidx;
3425 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3426 return -1;
3427 idx = SUBPAGE_IDX(start);
3428 eidx = SUBPAGE_IDX(end);
3429 #if defined(DEBUG_SUBPAGE)
3430 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3431 mmio, start, end, idx, eidx, memory);
3432 #endif
3433 if (memory_region_is_ram(phys_sections[section].mr)) {
3434 MemoryRegionSection new_section = phys_sections[section];
3435 new_section.mr = &io_mem_subpage_ram;
3436 section = phys_section_add(&new_section);
3438 for (; idx <= eidx; idx++) {
3439 mmio->sub_section[idx] = section;
3442 return 0;
3445 static subpage_t *subpage_init (target_phys_addr_t base, uint16_t *section_ind,
3446 uint16_t orig_section)
3448 subpage_t *mmio;
3449 MemoryRegionSection section = {
3450 .offset_within_address_space = base,
3451 .size = TARGET_PAGE_SIZE,
3454 mmio = g_malloc0(sizeof(subpage_t));
3456 mmio->base = base;
3457 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3458 "subpage", TARGET_PAGE_SIZE);
3459 mmio->iomem.subpage = true;
3460 section.mr = &mmio->iomem;
3461 #if defined(DEBUG_SUBPAGE)
3462 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3463 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3464 #endif
3465 *section_ind = phys_section_add(&section);
3466 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_section);
3468 return mmio;
3471 static int get_free_io_mem_idx(void)
3473 int i;
3475 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3476 if (!io_mem_used[i]) {
3477 io_mem_used[i] = 1;
3478 return i;
3480 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3481 return -1;
3484 /* mem_read and mem_write are arrays of functions containing the
3485 function to access byte (index 0), word (index 1) and dword (index
3486 2). Functions can be omitted with a NULL function pointer.
3487 If io_index is non zero, the corresponding io zone is
3488 modified. If it is zero, a new io zone is allocated. The return
3489 value can be used with cpu_register_physical_memory(). (-1) is
3490 returned if error. */
3491 static int cpu_register_io_memory_fixed(int io_index, MemoryRegion *mr)
3493 if (io_index <= 0) {
3494 io_index = get_free_io_mem_idx();
3495 if (io_index == -1)
3496 return io_index;
3497 } else {
3498 if (io_index >= IO_MEM_NB_ENTRIES)
3499 return -1;
3502 io_mem_region[io_index] = mr;
3504 return io_index;
3507 int cpu_register_io_memory(MemoryRegion *mr)
3509 return cpu_register_io_memory_fixed(0, mr);
3512 void cpu_unregister_io_memory(int io_index)
3514 io_mem_region[io_index] = NULL;
3515 io_mem_used[io_index] = 0;
3518 static uint16_t dummy_section(MemoryRegion *mr)
3520 MemoryRegionSection section = {
3521 .mr = mr,
3522 .offset_within_address_space = 0,
3523 .offset_within_region = 0,
3524 .size = UINT64_MAX,
3527 return phys_section_add(&section);
3530 static void io_mem_init(void)
3532 int i;
3534 /* Must be first: */
3535 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3536 assert(io_mem_ram.ram_addr == 0);
3537 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3538 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3539 "unassigned", UINT64_MAX);
3540 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3541 "notdirty", UINT64_MAX);
3542 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3543 "subpage-ram", UINT64_MAX);
3544 for (i=0; i<5; i++)
3545 io_mem_used[i] = 1;
3547 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3548 "watch", UINT64_MAX);
3551 static void core_begin(MemoryListener *listener)
3553 destroy_all_mappings();
3554 phys_sections_clear();
3555 phys_map.u.node = PHYS_MAP_NODE_NIL;
3556 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3559 static void core_commit(MemoryListener *listener)
3561 CPUState *env;
3563 /* since each CPU stores ram addresses in its TLB cache, we must
3564 reset the modified entries */
3565 /* XXX: slow ! */
3566 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3567 tlb_flush(env, 1);
3571 static void core_region_add(MemoryListener *listener,
3572 MemoryRegionSection *section)
3574 cpu_register_physical_memory_log(section, section->readonly);
3577 static void core_region_del(MemoryListener *listener,
3578 MemoryRegionSection *section)
3582 static void core_region_nop(MemoryListener *listener,
3583 MemoryRegionSection *section)
3585 cpu_register_physical_memory_log(section, section->readonly);
3588 static void core_log_start(MemoryListener *listener,
3589 MemoryRegionSection *section)
3593 static void core_log_stop(MemoryListener *listener,
3594 MemoryRegionSection *section)
3598 static void core_log_sync(MemoryListener *listener,
3599 MemoryRegionSection *section)
3603 static void core_log_global_start(MemoryListener *listener)
3605 cpu_physical_memory_set_dirty_tracking(1);
3608 static void core_log_global_stop(MemoryListener *listener)
3610 cpu_physical_memory_set_dirty_tracking(0);
3613 static void core_eventfd_add(MemoryListener *listener,
3614 MemoryRegionSection *section,
3615 bool match_data, uint64_t data, int fd)
3619 static void core_eventfd_del(MemoryListener *listener,
3620 MemoryRegionSection *section,
3621 bool match_data, uint64_t data, int fd)
3625 static void io_begin(MemoryListener *listener)
3629 static void io_commit(MemoryListener *listener)
3633 static void io_region_add(MemoryListener *listener,
3634 MemoryRegionSection *section)
3636 iorange_init(&section->mr->iorange, &memory_region_iorange_ops,
3637 section->offset_within_address_space, section->size);
3638 ioport_register(&section->mr->iorange);
3641 static void io_region_del(MemoryListener *listener,
3642 MemoryRegionSection *section)
3644 isa_unassign_ioport(section->offset_within_address_space, section->size);
3647 static void io_region_nop(MemoryListener *listener,
3648 MemoryRegionSection *section)
3652 static void io_log_start(MemoryListener *listener,
3653 MemoryRegionSection *section)
3657 static void io_log_stop(MemoryListener *listener,
3658 MemoryRegionSection *section)
3662 static void io_log_sync(MemoryListener *listener,
3663 MemoryRegionSection *section)
3667 static void io_log_global_start(MemoryListener *listener)
3671 static void io_log_global_stop(MemoryListener *listener)
3675 static void io_eventfd_add(MemoryListener *listener,
3676 MemoryRegionSection *section,
3677 bool match_data, uint64_t data, int fd)
3681 static void io_eventfd_del(MemoryListener *listener,
3682 MemoryRegionSection *section,
3683 bool match_data, uint64_t data, int fd)
3687 static MemoryListener core_memory_listener = {
3688 .begin = core_begin,
3689 .commit = core_commit,
3690 .region_add = core_region_add,
3691 .region_del = core_region_del,
3692 .region_nop = core_region_nop,
3693 .log_start = core_log_start,
3694 .log_stop = core_log_stop,
3695 .log_sync = core_log_sync,
3696 .log_global_start = core_log_global_start,
3697 .log_global_stop = core_log_global_stop,
3698 .eventfd_add = core_eventfd_add,
3699 .eventfd_del = core_eventfd_del,
3700 .priority = 0,
3703 static MemoryListener io_memory_listener = {
3704 .begin = io_begin,
3705 .commit = io_commit,
3706 .region_add = io_region_add,
3707 .region_del = io_region_del,
3708 .region_nop = io_region_nop,
3709 .log_start = io_log_start,
3710 .log_stop = io_log_stop,
3711 .log_sync = io_log_sync,
3712 .log_global_start = io_log_global_start,
3713 .log_global_stop = io_log_global_stop,
3714 .eventfd_add = io_eventfd_add,
3715 .eventfd_del = io_eventfd_del,
3716 .priority = 0,
3719 static void memory_map_init(void)
3721 system_memory = g_malloc(sizeof(*system_memory));
3722 memory_region_init(system_memory, "system", INT64_MAX);
3723 set_system_memory_map(system_memory);
3725 system_io = g_malloc(sizeof(*system_io));
3726 memory_region_init(system_io, "io", 65536);
3727 set_system_io_map(system_io);
3729 memory_listener_register(&core_memory_listener, system_memory);
3730 memory_listener_register(&io_memory_listener, system_io);
3733 MemoryRegion *get_system_memory(void)
3735 return system_memory;
3738 MemoryRegion *get_system_io(void)
3740 return system_io;
3743 #endif /* !defined(CONFIG_USER_ONLY) */
3745 /* physical memory access (slow version, mainly for debug) */
3746 #if defined(CONFIG_USER_ONLY)
3747 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3748 uint8_t *buf, int len, int is_write)
3750 int l, flags;
3751 target_ulong page;
3752 void * p;
3754 while (len > 0) {
3755 page = addr & TARGET_PAGE_MASK;
3756 l = (page + TARGET_PAGE_SIZE) - addr;
3757 if (l > len)
3758 l = len;
3759 flags = page_get_flags(page);
3760 if (!(flags & PAGE_VALID))
3761 return -1;
3762 if (is_write) {
3763 if (!(flags & PAGE_WRITE))
3764 return -1;
3765 /* XXX: this code should not depend on lock_user */
3766 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3767 return -1;
3768 memcpy(p, buf, l);
3769 unlock_user(p, addr, l);
3770 } else {
3771 if (!(flags & PAGE_READ))
3772 return -1;
3773 /* XXX: this code should not depend on lock_user */
3774 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3775 return -1;
3776 memcpy(buf, p, l);
3777 unlock_user(p, addr, 0);
3779 len -= l;
3780 buf += l;
3781 addr += l;
3783 return 0;
3786 #else
3787 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3788 int len, int is_write)
3790 int l, io_index;
3791 uint8_t *ptr;
3792 uint32_t val;
3793 target_phys_addr_t page;
3794 MemoryRegionSection section;
3796 while (len > 0) {
3797 page = addr & TARGET_PAGE_MASK;
3798 l = (page + TARGET_PAGE_SIZE) - addr;
3799 if (l > len)
3800 l = len;
3801 section = phys_page_find(page >> TARGET_PAGE_BITS);
3803 if (is_write) {
3804 if (!memory_region_is_ram(section.mr)) {
3805 target_phys_addr_t addr1;
3806 io_index = memory_region_get_ram_addr(section.mr)
3807 & (IO_MEM_NB_ENTRIES - 1);
3808 addr1 = (addr & ~TARGET_PAGE_MASK)
3809 + section.offset_within_region;
3810 /* XXX: could force cpu_single_env to NULL to avoid
3811 potential bugs */
3812 if (l >= 4 && ((addr1 & 3) == 0)) {
3813 /* 32 bit write access */
3814 val = ldl_p(buf);
3815 io_mem_write(io_index, addr1, val, 4);
3816 l = 4;
3817 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3818 /* 16 bit write access */
3819 val = lduw_p(buf);
3820 io_mem_write(io_index, addr1, val, 2);
3821 l = 2;
3822 } else {
3823 /* 8 bit write access */
3824 val = ldub_p(buf);
3825 io_mem_write(io_index, addr1, val, 1);
3826 l = 1;
3828 } else if (!section.readonly) {
3829 ram_addr_t addr1;
3830 addr1 = (memory_region_get_ram_addr(section.mr)
3831 + section.offset_within_region)
3832 | (addr & ~TARGET_PAGE_MASK);
3833 /* RAM case */
3834 ptr = qemu_get_ram_ptr(addr1);
3835 memcpy(ptr, buf, l);
3836 if (!cpu_physical_memory_is_dirty(addr1)) {
3837 /* invalidate code */
3838 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3839 /* set dirty bit */
3840 cpu_physical_memory_set_dirty_flags(
3841 addr1, (0xff & ~CODE_DIRTY_FLAG));
3843 qemu_put_ram_ptr(ptr);
3845 } else {
3846 if (!is_ram_rom_romd(&section)) {
3847 target_phys_addr_t addr1;
3848 /* I/O case */
3849 io_index = memory_region_get_ram_addr(section.mr)
3850 & (IO_MEM_NB_ENTRIES - 1);
3851 addr1 = (addr & ~TARGET_PAGE_MASK)
3852 + section.offset_within_region;
3853 if (l >= 4 && ((addr1 & 3) == 0)) {
3854 /* 32 bit read access */
3855 val = io_mem_read(io_index, addr1, 4);
3856 stl_p(buf, val);
3857 l = 4;
3858 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3859 /* 16 bit read access */
3860 val = io_mem_read(io_index, addr1, 2);
3861 stw_p(buf, val);
3862 l = 2;
3863 } else {
3864 /* 8 bit read access */
3865 val = io_mem_read(io_index, addr1, 1);
3866 stb_p(buf, val);
3867 l = 1;
3869 } else {
3870 /* RAM case */
3871 ptr = qemu_get_ram_ptr(section.mr->ram_addr
3872 + section.offset_within_region);
3873 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3874 qemu_put_ram_ptr(ptr);
3877 len -= l;
3878 buf += l;
3879 addr += l;
3883 /* used for ROM loading : can write in RAM and ROM */
3884 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3885 const uint8_t *buf, int len)
3887 int l;
3888 uint8_t *ptr;
3889 target_phys_addr_t page;
3890 MemoryRegionSection section;
3892 while (len > 0) {
3893 page = addr & TARGET_PAGE_MASK;
3894 l = (page + TARGET_PAGE_SIZE) - addr;
3895 if (l > len)
3896 l = len;
3897 section = phys_page_find(page >> TARGET_PAGE_BITS);
3899 if (!is_ram_rom_romd(&section)) {
3900 /* do nothing */
3901 } else {
3902 unsigned long addr1;
3903 addr1 = (memory_region_get_ram_addr(section.mr)
3904 + section.offset_within_region)
3905 + (addr & ~TARGET_PAGE_MASK);
3906 /* ROM/RAM case */
3907 ptr = qemu_get_ram_ptr(addr1);
3908 memcpy(ptr, buf, l);
3909 qemu_put_ram_ptr(ptr);
3911 len -= l;
3912 buf += l;
3913 addr += l;
3917 typedef struct {
3918 void *buffer;
3919 target_phys_addr_t addr;
3920 target_phys_addr_t len;
3921 } BounceBuffer;
3923 static BounceBuffer bounce;
3925 typedef struct MapClient {
3926 void *opaque;
3927 void (*callback)(void *opaque);
3928 QLIST_ENTRY(MapClient) link;
3929 } MapClient;
3931 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3932 = QLIST_HEAD_INITIALIZER(map_client_list);
3934 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3936 MapClient *client = g_malloc(sizeof(*client));
3938 client->opaque = opaque;
3939 client->callback = callback;
3940 QLIST_INSERT_HEAD(&map_client_list, client, link);
3941 return client;
3944 void cpu_unregister_map_client(void *_client)
3946 MapClient *client = (MapClient *)_client;
3948 QLIST_REMOVE(client, link);
3949 g_free(client);
3952 static void cpu_notify_map_clients(void)
3954 MapClient *client;
3956 while (!QLIST_EMPTY(&map_client_list)) {
3957 client = QLIST_FIRST(&map_client_list);
3958 client->callback(client->opaque);
3959 cpu_unregister_map_client(client);
3963 /* Map a physical memory region into a host virtual address.
3964 * May map a subset of the requested range, given by and returned in *plen.
3965 * May return NULL if resources needed to perform the mapping are exhausted.
3966 * Use only for reads OR writes - not for read-modify-write operations.
3967 * Use cpu_register_map_client() to know when retrying the map operation is
3968 * likely to succeed.
3970 void *cpu_physical_memory_map(target_phys_addr_t addr,
3971 target_phys_addr_t *plen,
3972 int is_write)
3974 target_phys_addr_t len = *plen;
3975 target_phys_addr_t todo = 0;
3976 int l;
3977 target_phys_addr_t page;
3978 MemoryRegionSection section;
3979 ram_addr_t raddr = RAM_ADDR_MAX;
3980 ram_addr_t rlen;
3981 void *ret;
3983 while (len > 0) {
3984 page = addr & TARGET_PAGE_MASK;
3985 l = (page + TARGET_PAGE_SIZE) - addr;
3986 if (l > len)
3987 l = len;
3988 section = phys_page_find(page >> TARGET_PAGE_BITS);
3990 if (!(memory_region_is_ram(section.mr) && !section.readonly)) {
3991 if (todo || bounce.buffer) {
3992 break;
3994 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3995 bounce.addr = addr;
3996 bounce.len = l;
3997 if (!is_write) {
3998 cpu_physical_memory_read(addr, bounce.buffer, l);
4001 *plen = l;
4002 return bounce.buffer;
4004 if (!todo) {
4005 raddr = memory_region_get_ram_addr(section.mr)
4006 + section.offset_within_region
4007 + (addr & ~TARGET_PAGE_MASK);
4010 len -= l;
4011 addr += l;
4012 todo += l;
4014 rlen = todo;
4015 ret = qemu_ram_ptr_length(raddr, &rlen);
4016 *plen = rlen;
4017 return ret;
4020 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4021 * Will also mark the memory as dirty if is_write == 1. access_len gives
4022 * the amount of memory that was actually read or written by the caller.
4024 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4025 int is_write, target_phys_addr_t access_len)
4027 if (buffer != bounce.buffer) {
4028 if (is_write) {
4029 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4030 while (access_len) {
4031 unsigned l;
4032 l = TARGET_PAGE_SIZE;
4033 if (l > access_len)
4034 l = access_len;
4035 if (!cpu_physical_memory_is_dirty(addr1)) {
4036 /* invalidate code */
4037 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4038 /* set dirty bit */
4039 cpu_physical_memory_set_dirty_flags(
4040 addr1, (0xff & ~CODE_DIRTY_FLAG));
4042 addr1 += l;
4043 access_len -= l;
4046 if (xen_enabled()) {
4047 xen_invalidate_map_cache_entry(buffer);
4049 return;
4051 if (is_write) {
4052 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4054 qemu_vfree(bounce.buffer);
4055 bounce.buffer = NULL;
4056 cpu_notify_map_clients();
4059 /* warning: addr must be aligned */
4060 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4061 enum device_endian endian)
4063 int io_index;
4064 uint8_t *ptr;
4065 uint32_t val;
4066 MemoryRegionSection section;
4068 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4070 if (!is_ram_rom_romd(&section)) {
4071 /* I/O case */
4072 io_index = memory_region_get_ram_addr(section.mr)
4073 & (IO_MEM_NB_ENTRIES - 1);
4074 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4075 val = io_mem_read(io_index, addr, 4);
4076 #if defined(TARGET_WORDS_BIGENDIAN)
4077 if (endian == DEVICE_LITTLE_ENDIAN) {
4078 val = bswap32(val);
4080 #else
4081 if (endian == DEVICE_BIG_ENDIAN) {
4082 val = bswap32(val);
4084 #endif
4085 } else {
4086 /* RAM case */
4087 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4088 & TARGET_PAGE_MASK)
4089 + section.offset_within_region) +
4090 (addr & ~TARGET_PAGE_MASK);
4091 switch (endian) {
4092 case DEVICE_LITTLE_ENDIAN:
4093 val = ldl_le_p(ptr);
4094 break;
4095 case DEVICE_BIG_ENDIAN:
4096 val = ldl_be_p(ptr);
4097 break;
4098 default:
4099 val = ldl_p(ptr);
4100 break;
4103 return val;
4106 uint32_t ldl_phys(target_phys_addr_t addr)
4108 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4111 uint32_t ldl_le_phys(target_phys_addr_t addr)
4113 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4116 uint32_t ldl_be_phys(target_phys_addr_t addr)
4118 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4121 /* warning: addr must be aligned */
4122 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4123 enum device_endian endian)
4125 int io_index;
4126 uint8_t *ptr;
4127 uint64_t val;
4128 MemoryRegionSection section;
4130 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4132 if (!is_ram_rom_romd(&section)) {
4133 /* I/O case */
4134 io_index = memory_region_get_ram_addr(section.mr)
4135 & (IO_MEM_NB_ENTRIES - 1);
4136 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4138 /* XXX This is broken when device endian != cpu endian.
4139 Fix and add "endian" variable check */
4140 #ifdef TARGET_WORDS_BIGENDIAN
4141 val = io_mem_read(io_index, addr, 4) << 32;
4142 val |= io_mem_read(io_index, addr + 4, 4);
4143 #else
4144 val = io_mem_read(io_index, addr, 4);
4145 val |= io_mem_read(io_index, addr + 4, 4) << 32;
4146 #endif
4147 } else {
4148 /* RAM case */
4149 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4150 & TARGET_PAGE_MASK)
4151 + section.offset_within_region)
4152 + (addr & ~TARGET_PAGE_MASK);
4153 switch (endian) {
4154 case DEVICE_LITTLE_ENDIAN:
4155 val = ldq_le_p(ptr);
4156 break;
4157 case DEVICE_BIG_ENDIAN:
4158 val = ldq_be_p(ptr);
4159 break;
4160 default:
4161 val = ldq_p(ptr);
4162 break;
4165 return val;
4168 uint64_t ldq_phys(target_phys_addr_t addr)
4170 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4173 uint64_t ldq_le_phys(target_phys_addr_t addr)
4175 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4178 uint64_t ldq_be_phys(target_phys_addr_t addr)
4180 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4183 /* XXX: optimize */
4184 uint32_t ldub_phys(target_phys_addr_t addr)
4186 uint8_t val;
4187 cpu_physical_memory_read(addr, &val, 1);
4188 return val;
4191 /* warning: addr must be aligned */
4192 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4193 enum device_endian endian)
4195 int io_index;
4196 uint8_t *ptr;
4197 uint64_t val;
4198 MemoryRegionSection section;
4200 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4202 if (!is_ram_rom_romd(&section)) {
4203 /* I/O case */
4204 io_index = memory_region_get_ram_addr(section.mr)
4205 & (IO_MEM_NB_ENTRIES - 1);
4206 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4207 val = io_mem_read(io_index, addr, 2);
4208 #if defined(TARGET_WORDS_BIGENDIAN)
4209 if (endian == DEVICE_LITTLE_ENDIAN) {
4210 val = bswap16(val);
4212 #else
4213 if (endian == DEVICE_BIG_ENDIAN) {
4214 val = bswap16(val);
4216 #endif
4217 } else {
4218 /* RAM case */
4219 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4220 & TARGET_PAGE_MASK)
4221 + section.offset_within_region)
4222 + (addr & ~TARGET_PAGE_MASK);
4223 switch (endian) {
4224 case DEVICE_LITTLE_ENDIAN:
4225 val = lduw_le_p(ptr);
4226 break;
4227 case DEVICE_BIG_ENDIAN:
4228 val = lduw_be_p(ptr);
4229 break;
4230 default:
4231 val = lduw_p(ptr);
4232 break;
4235 return val;
4238 uint32_t lduw_phys(target_phys_addr_t addr)
4240 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4243 uint32_t lduw_le_phys(target_phys_addr_t addr)
4245 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4248 uint32_t lduw_be_phys(target_phys_addr_t addr)
4250 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4253 /* warning: addr must be aligned. The ram page is not masked as dirty
4254 and the code inside is not invalidated. It is useful if the dirty
4255 bits are used to track modified PTEs */
4256 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4258 int io_index;
4259 uint8_t *ptr;
4260 MemoryRegionSection section;
4262 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4264 if (!memory_region_is_ram(section.mr) || section.readonly) {
4265 if (memory_region_is_ram(section.mr)) {
4266 io_index = io_mem_rom.ram_addr;
4267 } else {
4268 io_index = memory_region_get_ram_addr(section.mr);
4270 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4271 io_mem_write(io_index, addr, val, 4);
4272 } else {
4273 unsigned long addr1 = (memory_region_get_ram_addr(section.mr)
4274 & TARGET_PAGE_MASK)
4275 + section.offset_within_region
4276 + (addr & ~TARGET_PAGE_MASK);
4277 ptr = qemu_get_ram_ptr(addr1);
4278 stl_p(ptr, val);
4280 if (unlikely(in_migration)) {
4281 if (!cpu_physical_memory_is_dirty(addr1)) {
4282 /* invalidate code */
4283 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4284 /* set dirty bit */
4285 cpu_physical_memory_set_dirty_flags(
4286 addr1, (0xff & ~CODE_DIRTY_FLAG));
4292 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4294 int io_index;
4295 uint8_t *ptr;
4296 MemoryRegionSection section;
4298 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4300 if (!memory_region_is_ram(section.mr) || section.readonly) {
4301 if (memory_region_is_ram(section.mr)) {
4302 io_index = io_mem_rom.ram_addr;
4303 } else {
4304 io_index = memory_region_get_ram_addr(section.mr)
4305 & (IO_MEM_NB_ENTRIES - 1);
4307 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4308 #ifdef TARGET_WORDS_BIGENDIAN
4309 io_mem_write(io_index, addr, val >> 32, 4);
4310 io_mem_write(io_index, addr + 4, (uint32_t)val, 4);
4311 #else
4312 io_mem_write(io_index, addr, (uint32_t)val, 4);
4313 io_mem_write(io_index, addr + 4, val >> 32, 4);
4314 #endif
4315 } else {
4316 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4317 & TARGET_PAGE_MASK)
4318 + section.offset_within_region)
4319 + (addr & ~TARGET_PAGE_MASK);
4320 stq_p(ptr, val);
4324 /* warning: addr must be aligned */
4325 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4326 enum device_endian endian)
4328 int io_index;
4329 uint8_t *ptr;
4330 MemoryRegionSection section;
4332 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4334 if (!memory_region_is_ram(section.mr) || section.readonly) {
4335 if (memory_region_is_ram(section.mr)) {
4336 io_index = io_mem_rom.ram_addr;
4337 } else {
4338 io_index = memory_region_get_ram_addr(section.mr)
4339 & (IO_MEM_NB_ENTRIES - 1);
4341 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4342 #if defined(TARGET_WORDS_BIGENDIAN)
4343 if (endian == DEVICE_LITTLE_ENDIAN) {
4344 val = bswap32(val);
4346 #else
4347 if (endian == DEVICE_BIG_ENDIAN) {
4348 val = bswap32(val);
4350 #endif
4351 io_mem_write(io_index, addr, val, 4);
4352 } else {
4353 unsigned long addr1;
4354 addr1 = (memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK)
4355 + section.offset_within_region
4356 + (addr & ~TARGET_PAGE_MASK);
4357 /* RAM case */
4358 ptr = qemu_get_ram_ptr(addr1);
4359 switch (endian) {
4360 case DEVICE_LITTLE_ENDIAN:
4361 stl_le_p(ptr, val);
4362 break;
4363 case DEVICE_BIG_ENDIAN:
4364 stl_be_p(ptr, val);
4365 break;
4366 default:
4367 stl_p(ptr, val);
4368 break;
4370 if (!cpu_physical_memory_is_dirty(addr1)) {
4371 /* invalidate code */
4372 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4373 /* set dirty bit */
4374 cpu_physical_memory_set_dirty_flags(addr1,
4375 (0xff & ~CODE_DIRTY_FLAG));
4380 void stl_phys(target_phys_addr_t addr, uint32_t val)
4382 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4385 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4387 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4390 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4392 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4395 /* XXX: optimize */
4396 void stb_phys(target_phys_addr_t addr, uint32_t val)
4398 uint8_t v = val;
4399 cpu_physical_memory_write(addr, &v, 1);
4402 /* warning: addr must be aligned */
4403 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4404 enum device_endian endian)
4406 int io_index;
4407 uint8_t *ptr;
4408 MemoryRegionSection section;
4410 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4412 if (!memory_region_is_ram(section.mr) || section.readonly) {
4413 if (memory_region_is_ram(section.mr)) {
4414 io_index = io_mem_rom.ram_addr;
4415 } else {
4416 io_index = memory_region_get_ram_addr(section.mr)
4417 & (IO_MEM_NB_ENTRIES - 1);
4419 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4420 #if defined(TARGET_WORDS_BIGENDIAN)
4421 if (endian == DEVICE_LITTLE_ENDIAN) {
4422 val = bswap16(val);
4424 #else
4425 if (endian == DEVICE_BIG_ENDIAN) {
4426 val = bswap16(val);
4428 #endif
4429 io_mem_write(io_index, addr, val, 2);
4430 } else {
4431 unsigned long addr1;
4432 addr1 = (memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK)
4433 + section.offset_within_region + (addr & ~TARGET_PAGE_MASK);
4434 /* RAM case */
4435 ptr = qemu_get_ram_ptr(addr1);
4436 switch (endian) {
4437 case DEVICE_LITTLE_ENDIAN:
4438 stw_le_p(ptr, val);
4439 break;
4440 case DEVICE_BIG_ENDIAN:
4441 stw_be_p(ptr, val);
4442 break;
4443 default:
4444 stw_p(ptr, val);
4445 break;
4447 if (!cpu_physical_memory_is_dirty(addr1)) {
4448 /* invalidate code */
4449 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4450 /* set dirty bit */
4451 cpu_physical_memory_set_dirty_flags(addr1,
4452 (0xff & ~CODE_DIRTY_FLAG));
4457 void stw_phys(target_phys_addr_t addr, uint32_t val)
4459 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4462 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4464 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4467 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4469 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4472 /* XXX: optimize */
4473 void stq_phys(target_phys_addr_t addr, uint64_t val)
4475 val = tswap64(val);
4476 cpu_physical_memory_write(addr, &val, 8);
4479 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4481 val = cpu_to_le64(val);
4482 cpu_physical_memory_write(addr, &val, 8);
4485 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4487 val = cpu_to_be64(val);
4488 cpu_physical_memory_write(addr, &val, 8);
4491 /* virtual memory access for debug (includes writing to ROM) */
4492 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4493 uint8_t *buf, int len, int is_write)
4495 int l;
4496 target_phys_addr_t phys_addr;
4497 target_ulong page;
4499 while (len > 0) {
4500 page = addr & TARGET_PAGE_MASK;
4501 phys_addr = cpu_get_phys_page_debug(env, page);
4502 /* if no physical page mapped, return an error */
4503 if (phys_addr == -1)
4504 return -1;
4505 l = (page + TARGET_PAGE_SIZE) - addr;
4506 if (l > len)
4507 l = len;
4508 phys_addr += (addr & ~TARGET_PAGE_MASK);
4509 if (is_write)
4510 cpu_physical_memory_write_rom(phys_addr, buf, l);
4511 else
4512 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4513 len -= l;
4514 buf += l;
4515 addr += l;
4517 return 0;
4519 #endif
4521 /* in deterministic execution mode, instructions doing device I/Os
4522 must be at the end of the TB */
4523 void cpu_io_recompile(CPUState *env, void *retaddr)
4525 TranslationBlock *tb;
4526 uint32_t n, cflags;
4527 target_ulong pc, cs_base;
4528 uint64_t flags;
4530 tb = tb_find_pc((unsigned long)retaddr);
4531 if (!tb) {
4532 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4533 retaddr);
4535 n = env->icount_decr.u16.low + tb->icount;
4536 cpu_restore_state(tb, env, (unsigned long)retaddr);
4537 /* Calculate how many instructions had been executed before the fault
4538 occurred. */
4539 n = n - env->icount_decr.u16.low;
4540 /* Generate a new TB ending on the I/O insn. */
4541 n++;
4542 /* On MIPS and SH, delay slot instructions can only be restarted if
4543 they were already the first instruction in the TB. If this is not
4544 the first instruction in a TB then re-execute the preceding
4545 branch. */
4546 #if defined(TARGET_MIPS)
4547 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4548 env->active_tc.PC -= 4;
4549 env->icount_decr.u16.low++;
4550 env->hflags &= ~MIPS_HFLAG_BMASK;
4552 #elif defined(TARGET_SH4)
4553 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4554 && n > 1) {
4555 env->pc -= 2;
4556 env->icount_decr.u16.low++;
4557 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4559 #endif
4560 /* This should never happen. */
4561 if (n > CF_COUNT_MASK)
4562 cpu_abort(env, "TB too big during recompile");
4564 cflags = n | CF_LAST_IO;
4565 pc = tb->pc;
4566 cs_base = tb->cs_base;
4567 flags = tb->flags;
4568 tb_phys_invalidate(tb, -1);
4569 /* FIXME: In theory this could raise an exception. In practice
4570 we have already translated the block once so it's probably ok. */
4571 tb_gen_code(env, pc, cs_base, flags, cflags);
4572 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4573 the first in the TB) then we end up generating a whole new TB and
4574 repeating the fault, which is horribly inefficient.
4575 Better would be to execute just this insn uncached, or generate a
4576 second new TB. */
4577 cpu_resume_from_signal(env, NULL);
4580 #if !defined(CONFIG_USER_ONLY)
4582 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4584 int i, target_code_size, max_target_code_size;
4585 int direct_jmp_count, direct_jmp2_count, cross_page;
4586 TranslationBlock *tb;
4588 target_code_size = 0;
4589 max_target_code_size = 0;
4590 cross_page = 0;
4591 direct_jmp_count = 0;
4592 direct_jmp2_count = 0;
4593 for(i = 0; i < nb_tbs; i++) {
4594 tb = &tbs[i];
4595 target_code_size += tb->size;
4596 if (tb->size > max_target_code_size)
4597 max_target_code_size = tb->size;
4598 if (tb->page_addr[1] != -1)
4599 cross_page++;
4600 if (tb->tb_next_offset[0] != 0xffff) {
4601 direct_jmp_count++;
4602 if (tb->tb_next_offset[1] != 0xffff) {
4603 direct_jmp2_count++;
4607 /* XXX: avoid using doubles ? */
4608 cpu_fprintf(f, "Translation buffer state:\n");
4609 cpu_fprintf(f, "gen code size %td/%ld\n",
4610 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4611 cpu_fprintf(f, "TB count %d/%d\n",
4612 nb_tbs, code_gen_max_blocks);
4613 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4614 nb_tbs ? target_code_size / nb_tbs : 0,
4615 max_target_code_size);
4616 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4617 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4618 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4619 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4620 cross_page,
4621 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4622 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4623 direct_jmp_count,
4624 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4625 direct_jmp2_count,
4626 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4627 cpu_fprintf(f, "\nStatistics:\n");
4628 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4629 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4630 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4631 tcg_dump_info(f, cpu_fprintf);
4634 /* NOTE: this function can trigger an exception */
4635 /* NOTE2: the returned address is not exactly the physical address: it
4636 is the offset relative to phys_ram_base */
4637 tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
4639 int mmu_idx, page_index, pd;
4640 void *p;
4642 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
4643 mmu_idx = cpu_mmu_index(env1);
4644 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
4645 (addr & TARGET_PAGE_MASK))) {
4646 ldub_code(addr);
4648 pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
4649 if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
4650 && !io_mem_region[pd]->rom_device) {
4651 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
4652 cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
4653 #else
4654 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
4655 #endif
4657 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
4658 return qemu_ram_addr_from_host_nofail(p);
4662 * A helper function for the _utterly broken_ virtio device model to find out if
4663 * it's running on a big endian machine. Don't do this at home kids!
4665 bool virtio_is_big_endian(void);
4666 bool virtio_is_big_endian(void)
4668 #if defined(TARGET_WORDS_BIGENDIAN)
4669 return true;
4670 #else
4671 return false;
4672 #endif
4675 #define MMUSUFFIX _cmmu
4676 #undef GETPC
4677 #define GETPC() NULL
4678 #define env cpu_single_env
4679 #define SOFTMMU_CODE_ACCESS
4681 #define SHIFT 0
4682 #include "softmmu_template.h"
4684 #define SHIFT 1
4685 #include "softmmu_template.h"
4687 #define SHIFT 2
4688 #include "softmmu_template.h"
4690 #define SHIFT 3
4691 #include "softmmu_template.h"
4693 #undef env
4695 #endif