kvm: make hyperv vapic assist page migratable
[qemu/ar7.git] / exec.c
blob2435d9ecd9bf987da21001a6ce9b322d2ed1dde4
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
45 #include "trace.h"
46 #endif
47 #include "exec/cpu-all.h"
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
52 #include "exec/memory-internal.h"
53 #include "exec/ram_addr.h"
54 #include "qemu/cache-utils.h"
56 #include "qemu/range.h"
58 //#define DEBUG_SUBPAGE
60 #if !defined(CONFIG_USER_ONLY)
61 static bool in_migration;
63 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
65 static MemoryRegion *system_memory;
66 static MemoryRegion *system_io;
68 AddressSpace address_space_io;
69 AddressSpace address_space_memory;
71 MemoryRegion io_mem_rom, io_mem_notdirty;
72 static MemoryRegion io_mem_unassigned;
74 #endif
76 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
77 /* current CPU in the current thread. It is only valid inside
78 cpu_exec() */
79 DEFINE_TLS(CPUState *, current_cpu);
80 /* 0 = Do not count executed instructions.
81 1 = Precise instruction counting.
82 2 = Adaptive rate instruction counting. */
83 int use_icount;
85 #if !defined(CONFIG_USER_ONLY)
87 typedef struct PhysPageEntry PhysPageEntry;
89 struct PhysPageEntry {
90 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
91 uint32_t skip : 6;
92 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
93 uint32_t ptr : 26;
96 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
98 /* Size of the L2 (and L3, etc) page tables. */
99 #define ADDR_SPACE_BITS 64
101 #define P_L2_BITS 9
102 #define P_L2_SIZE (1 << P_L2_BITS)
104 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
106 typedef PhysPageEntry Node[P_L2_SIZE];
108 typedef struct PhysPageMap {
109 unsigned sections_nb;
110 unsigned sections_nb_alloc;
111 unsigned nodes_nb;
112 unsigned nodes_nb_alloc;
113 Node *nodes;
114 MemoryRegionSection *sections;
115 } PhysPageMap;
117 struct AddressSpaceDispatch {
118 /* This is a multi-level map on the physical address space.
119 * The bottom level has pointers to MemoryRegionSections.
121 PhysPageEntry phys_map;
122 PhysPageMap map;
123 AddressSpace *as;
126 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
127 typedef struct subpage_t {
128 MemoryRegion iomem;
129 AddressSpace *as;
130 hwaddr base;
131 uint16_t sub_section[TARGET_PAGE_SIZE];
132 } subpage_t;
134 #define PHYS_SECTION_UNASSIGNED 0
135 #define PHYS_SECTION_NOTDIRTY 1
136 #define PHYS_SECTION_ROM 2
137 #define PHYS_SECTION_WATCH 3
139 static void io_mem_init(void);
140 static void memory_map_init(void);
142 static MemoryRegion io_mem_watch;
143 #endif
145 #if !defined(CONFIG_USER_ONLY)
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
158 unsigned i;
159 uint32_t ret;
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
168 return ret;
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
173 int level)
175 PhysPageEntry *p;
176 int i;
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
182 if (level == 0) {
183 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].skip = 0;
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
188 } else {
189 p = map->nodes[lp->ptr];
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 lp->skip = 0;
196 lp->ptr = leaf;
197 *index += step;
198 *nb -= step;
199 } else {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
202 ++lp;
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
208 uint16_t leaf)
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
221 unsigned valid_ptr = P_L2_SIZE;
222 int valid = 0;
223 PhysPageEntry *p;
224 int i;
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
227 return;
230 p = nodes[lp->ptr];
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
233 continue;
236 valid_ptr = i;
237 valid++;
238 if (p[i].skip) {
239 phys_page_compact(&p[i], nodes, compacted);
243 /* We can only compress if there's only one child. */
244 if (valid != 1) {
245 return;
248 assert(valid_ptr < P_L2_SIZE);
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
252 return;
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
261 * change this rule.
263 lp->skip = 0;
264 } else {
265 lp->skip += p[valid_ptr].skip;
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
271 DECLARE_BITMAP(compacted, nodes_nb);
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
281 PhysPageEntry *p;
282 hwaddr index = addr >> TARGET_PAGE_BITS;
283 int i;
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return &sections[PHYS_SECTION_UNASSIGNED];
289 p = nodes[lp.ptr];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return &sections[lp.ptr];
297 } else {
298 return &sections[PHYS_SECTION_UNASSIGNED];
302 bool memory_region_is_unassigned(MemoryRegion *mr)
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 hwaddr addr,
310 bool resolve_subpage)
312 MemoryRegionSection *section;
313 subpage_t *subpage;
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
320 return section;
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
327 MemoryRegionSection *section;
328 Int128 diff;
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 return section;
342 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
343 hwaddr *xlat, hwaddr *plen,
344 bool is_write)
346 IOMMUTLBEntry iotlb;
347 MemoryRegionSection *section;
348 MemoryRegion *mr;
349 hwaddr len = *plen;
351 for (;;) {
352 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
353 mr = section->mr;
355 if (!mr->iommu_ops) {
356 break;
359 iotlb = mr->iommu_ops->translate(mr, addr);
360 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
361 | (addr & iotlb.addr_mask));
362 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
363 if (!(iotlb.perm & (1 << is_write))) {
364 mr = &io_mem_unassigned;
365 break;
368 as = iotlb.target_as;
371 *plen = len;
372 *xlat = addr;
373 return mr;
376 MemoryRegionSection *
377 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
378 hwaddr *plen)
380 MemoryRegionSection *section;
381 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
383 assert(!section->mr->iommu_ops);
384 return section;
386 #endif
388 void cpu_exec_init_all(void)
390 #if !defined(CONFIG_USER_ONLY)
391 qemu_mutex_init(&ram_list.mutex);
392 memory_map_init();
393 io_mem_init();
394 #endif
397 #if !defined(CONFIG_USER_ONLY)
399 static int cpu_common_post_load(void *opaque, int version_id)
401 CPUState *cpu = opaque;
403 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
404 version_id is increased. */
405 cpu->interrupt_request &= ~0x01;
406 tlb_flush(cpu->env_ptr, 1);
408 return 0;
411 const VMStateDescription vmstate_cpu_common = {
412 .name = "cpu_common",
413 .version_id = 1,
414 .minimum_version_id = 1,
415 .minimum_version_id_old = 1,
416 .post_load = cpu_common_post_load,
417 .fields = (VMStateField []) {
418 VMSTATE_UINT32(halted, CPUState),
419 VMSTATE_UINT32(interrupt_request, CPUState),
420 VMSTATE_END_OF_LIST()
424 #endif
426 CPUState *qemu_get_cpu(int index)
428 CPUState *cpu;
430 CPU_FOREACH(cpu) {
431 if (cpu->cpu_index == index) {
432 return cpu;
436 return NULL;
439 void cpu_exec_init(CPUArchState *env)
441 CPUState *cpu = ENV_GET_CPU(env);
442 CPUClass *cc = CPU_GET_CLASS(cpu);
443 CPUState *some_cpu;
444 int cpu_index;
446 #if defined(CONFIG_USER_ONLY)
447 cpu_list_lock();
448 #endif
449 cpu_index = 0;
450 CPU_FOREACH(some_cpu) {
451 cpu_index++;
453 cpu->cpu_index = cpu_index;
454 cpu->numa_node = 0;
455 QTAILQ_INIT(&env->breakpoints);
456 QTAILQ_INIT(&env->watchpoints);
457 #ifndef CONFIG_USER_ONLY
458 cpu->thread_id = qemu_get_thread_id();
459 #endif
460 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
461 #if defined(CONFIG_USER_ONLY)
462 cpu_list_unlock();
463 #endif
464 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
465 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
467 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
468 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
469 cpu_save, cpu_load, env);
470 assert(cc->vmsd == NULL);
471 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
472 #endif
473 if (cc->vmsd != NULL) {
474 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
478 #if defined(TARGET_HAS_ICE)
479 #if defined(CONFIG_USER_ONLY)
480 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
482 tb_invalidate_phys_page_range(pc, pc + 1, 0);
484 #else
485 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
487 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
488 if (phys != -1) {
489 tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
492 #endif
493 #endif /* TARGET_HAS_ICE */
495 #if defined(CONFIG_USER_ONLY)
496 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
501 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
502 int flags, CPUWatchpoint **watchpoint)
504 return -ENOSYS;
506 #else
507 /* Add a watchpoint. */
508 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
509 int flags, CPUWatchpoint **watchpoint)
511 target_ulong len_mask = ~(len - 1);
512 CPUWatchpoint *wp;
514 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
515 if ((len & (len - 1)) || (addr & ~len_mask) ||
516 len == 0 || len > TARGET_PAGE_SIZE) {
517 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
518 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
519 return -EINVAL;
521 wp = g_malloc(sizeof(*wp));
523 wp->vaddr = addr;
524 wp->len_mask = len_mask;
525 wp->flags = flags;
527 /* keep all GDB-injected watchpoints in front */
528 if (flags & BP_GDB)
529 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
530 else
531 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
533 tlb_flush_page(env, addr);
535 if (watchpoint)
536 *watchpoint = wp;
537 return 0;
540 /* Remove a specific watchpoint. */
541 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
542 int flags)
544 target_ulong len_mask = ~(len - 1);
545 CPUWatchpoint *wp;
547 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
548 if (addr == wp->vaddr && len_mask == wp->len_mask
549 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
550 cpu_watchpoint_remove_by_ref(env, wp);
551 return 0;
554 return -ENOENT;
557 /* Remove a specific watchpoint by reference. */
558 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
560 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
562 tlb_flush_page(env, watchpoint->vaddr);
564 g_free(watchpoint);
567 /* Remove all matching watchpoints. */
568 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
570 CPUWatchpoint *wp, *next;
572 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
573 if (wp->flags & mask)
574 cpu_watchpoint_remove_by_ref(env, wp);
577 #endif
579 /* Add a breakpoint. */
580 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
581 CPUBreakpoint **breakpoint)
583 #if defined(TARGET_HAS_ICE)
584 CPUBreakpoint *bp;
586 bp = g_malloc(sizeof(*bp));
588 bp->pc = pc;
589 bp->flags = flags;
591 /* keep all GDB-injected breakpoints in front */
592 if (flags & BP_GDB) {
593 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
594 } else {
595 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
598 breakpoint_invalidate(ENV_GET_CPU(env), pc);
600 if (breakpoint) {
601 *breakpoint = bp;
603 return 0;
604 #else
605 return -ENOSYS;
606 #endif
609 /* Remove a specific breakpoint. */
610 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
612 #if defined(TARGET_HAS_ICE)
613 CPUBreakpoint *bp;
615 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
616 if (bp->pc == pc && bp->flags == flags) {
617 cpu_breakpoint_remove_by_ref(env, bp);
618 return 0;
621 return -ENOENT;
622 #else
623 return -ENOSYS;
624 #endif
627 /* Remove a specific breakpoint by reference. */
628 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
630 #if defined(TARGET_HAS_ICE)
631 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
633 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
635 g_free(breakpoint);
636 #endif
639 /* Remove all matching breakpoints. */
640 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
642 #if defined(TARGET_HAS_ICE)
643 CPUBreakpoint *bp, *next;
645 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
646 if (bp->flags & mask)
647 cpu_breakpoint_remove_by_ref(env, bp);
649 #endif
652 /* enable or disable single step mode. EXCP_DEBUG is returned by the
653 CPU loop after each instruction */
654 void cpu_single_step(CPUState *cpu, int enabled)
656 #if defined(TARGET_HAS_ICE)
657 if (cpu->singlestep_enabled != enabled) {
658 cpu->singlestep_enabled = enabled;
659 if (kvm_enabled()) {
660 kvm_update_guest_debug(cpu, 0);
661 } else {
662 /* must flush all the translated code to avoid inconsistencies */
663 /* XXX: only flush what is necessary */
664 CPUArchState *env = cpu->env_ptr;
665 tb_flush(env);
668 #endif
671 void cpu_abort(CPUArchState *env, const char *fmt, ...)
673 CPUState *cpu = ENV_GET_CPU(env);
674 va_list ap;
675 va_list ap2;
677 va_start(ap, fmt);
678 va_copy(ap2, ap);
679 fprintf(stderr, "qemu: fatal: ");
680 vfprintf(stderr, fmt, ap);
681 fprintf(stderr, "\n");
682 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
683 if (qemu_log_enabled()) {
684 qemu_log("qemu: fatal: ");
685 qemu_log_vprintf(fmt, ap2);
686 qemu_log("\n");
687 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
688 qemu_log_flush();
689 qemu_log_close();
691 va_end(ap2);
692 va_end(ap);
693 #if defined(CONFIG_USER_ONLY)
695 struct sigaction act;
696 sigfillset(&act.sa_mask);
697 act.sa_handler = SIG_DFL;
698 sigaction(SIGABRT, &act, NULL);
700 #endif
701 abort();
704 #if !defined(CONFIG_USER_ONLY)
705 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
707 RAMBlock *block;
709 /* The list is protected by the iothread lock here. */
710 block = ram_list.mru_block;
711 if (block && addr - block->offset < block->length) {
712 goto found;
714 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
715 if (addr - block->offset < block->length) {
716 goto found;
720 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
721 abort();
723 found:
724 ram_list.mru_block = block;
725 return block;
728 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
730 ram_addr_t start1;
731 RAMBlock *block;
732 ram_addr_t end;
734 end = TARGET_PAGE_ALIGN(start + length);
735 start &= TARGET_PAGE_MASK;
737 block = qemu_get_ram_block(start);
738 assert(block == qemu_get_ram_block(end - 1));
739 start1 = (uintptr_t)block->host + (start - block->offset);
740 cpu_tlb_reset_dirty_all(start1, length);
743 /* Note: start and end must be within the same ram block. */
744 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
745 unsigned client)
747 if (length == 0)
748 return;
749 cpu_physical_memory_clear_dirty_range(start, length, client);
751 if (tcg_enabled()) {
752 tlb_reset_dirty_range_all(start, length);
756 static void cpu_physical_memory_set_dirty_tracking(bool enable)
758 in_migration = enable;
761 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
762 MemoryRegionSection *section,
763 target_ulong vaddr,
764 hwaddr paddr, hwaddr xlat,
765 int prot,
766 target_ulong *address)
768 hwaddr iotlb;
769 CPUWatchpoint *wp;
771 if (memory_region_is_ram(section->mr)) {
772 /* Normal RAM. */
773 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
774 + xlat;
775 if (!section->readonly) {
776 iotlb |= PHYS_SECTION_NOTDIRTY;
777 } else {
778 iotlb |= PHYS_SECTION_ROM;
780 } else {
781 iotlb = section - address_space_memory.dispatch->map.sections;
782 iotlb += xlat;
785 /* Make accesses to pages with watchpoints go via the
786 watchpoint trap routines. */
787 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
788 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
789 /* Avoid trapping reads of pages with a write breakpoint. */
790 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
791 iotlb = PHYS_SECTION_WATCH + paddr;
792 *address |= TLB_MMIO;
793 break;
798 return iotlb;
800 #endif /* defined(CONFIG_USER_ONLY) */
802 #if !defined(CONFIG_USER_ONLY)
804 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
805 uint16_t section);
806 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
808 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
811 * Set a custom physical guest memory alloator.
812 * Accelerators with unusual needs may need this. Hopefully, we can
813 * get rid of it eventually.
815 void phys_mem_set_alloc(void *(*alloc)(size_t))
817 phys_mem_alloc = alloc;
820 static uint16_t phys_section_add(PhysPageMap *map,
821 MemoryRegionSection *section)
823 /* The physical section number is ORed with a page-aligned
824 * pointer to produce the iotlb entries. Thus it should
825 * never overflow into the page-aligned value.
827 assert(map->sections_nb < TARGET_PAGE_SIZE);
829 if (map->sections_nb == map->sections_nb_alloc) {
830 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
831 map->sections = g_renew(MemoryRegionSection, map->sections,
832 map->sections_nb_alloc);
834 map->sections[map->sections_nb] = *section;
835 memory_region_ref(section->mr);
836 return map->sections_nb++;
839 static void phys_section_destroy(MemoryRegion *mr)
841 memory_region_unref(mr);
843 if (mr->subpage) {
844 subpage_t *subpage = container_of(mr, subpage_t, iomem);
845 memory_region_destroy(&subpage->iomem);
846 g_free(subpage);
850 static void phys_sections_free(PhysPageMap *map)
852 while (map->sections_nb > 0) {
853 MemoryRegionSection *section = &map->sections[--map->sections_nb];
854 phys_section_destroy(section->mr);
856 g_free(map->sections);
857 g_free(map->nodes);
860 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
862 subpage_t *subpage;
863 hwaddr base = section->offset_within_address_space
864 & TARGET_PAGE_MASK;
865 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
866 d->map.nodes, d->map.sections);
867 MemoryRegionSection subsection = {
868 .offset_within_address_space = base,
869 .size = int128_make64(TARGET_PAGE_SIZE),
871 hwaddr start, end;
873 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
875 if (!(existing->mr->subpage)) {
876 subpage = subpage_init(d->as, base);
877 subsection.mr = &subpage->iomem;
878 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
879 phys_section_add(&d->map, &subsection));
880 } else {
881 subpage = container_of(existing->mr, subpage_t, iomem);
883 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
884 end = start + int128_get64(section->size) - 1;
885 subpage_register(subpage, start, end,
886 phys_section_add(&d->map, section));
890 static void register_multipage(AddressSpaceDispatch *d,
891 MemoryRegionSection *section)
893 hwaddr start_addr = section->offset_within_address_space;
894 uint16_t section_index = phys_section_add(&d->map, section);
895 uint64_t num_pages = int128_get64(int128_rshift(section->size,
896 TARGET_PAGE_BITS));
898 assert(num_pages);
899 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
902 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
904 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
905 AddressSpaceDispatch *d = as->next_dispatch;
906 MemoryRegionSection now = *section, remain = *section;
907 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
909 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
910 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
911 - now.offset_within_address_space;
913 now.size = int128_min(int128_make64(left), now.size);
914 register_subpage(d, &now);
915 } else {
916 now.size = int128_zero();
918 while (int128_ne(remain.size, now.size)) {
919 remain.size = int128_sub(remain.size, now.size);
920 remain.offset_within_address_space += int128_get64(now.size);
921 remain.offset_within_region += int128_get64(now.size);
922 now = remain;
923 if (int128_lt(remain.size, page_size)) {
924 register_subpage(d, &now);
925 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
926 now.size = page_size;
927 register_subpage(d, &now);
928 } else {
929 now.size = int128_and(now.size, int128_neg(page_size));
930 register_multipage(d, &now);
935 void qemu_flush_coalesced_mmio_buffer(void)
937 if (kvm_enabled())
938 kvm_flush_coalesced_mmio_buffer();
941 void qemu_mutex_lock_ramlist(void)
943 qemu_mutex_lock(&ram_list.mutex);
946 void qemu_mutex_unlock_ramlist(void)
948 qemu_mutex_unlock(&ram_list.mutex);
951 #ifdef __linux__
953 #include <sys/vfs.h>
955 #define HUGETLBFS_MAGIC 0x958458f6
957 static long gethugepagesize(const char *path)
959 struct statfs fs;
960 int ret;
962 do {
963 ret = statfs(path, &fs);
964 } while (ret != 0 && errno == EINTR);
966 if (ret != 0) {
967 perror(path);
968 return 0;
971 if (fs.f_type != HUGETLBFS_MAGIC)
972 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
974 return fs.f_bsize;
977 static sigjmp_buf sigjump;
979 static void sigbus_handler(int signal)
981 siglongjmp(sigjump, 1);
984 static void *file_ram_alloc(RAMBlock *block,
985 ram_addr_t memory,
986 const char *path)
988 char *filename;
989 char *sanitized_name;
990 char *c;
991 void *area;
992 int fd;
993 unsigned long hpagesize;
995 hpagesize = gethugepagesize(path);
996 if (!hpagesize) {
997 return NULL;
1000 if (memory < hpagesize) {
1001 return NULL;
1004 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1005 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1006 return NULL;
1009 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1010 sanitized_name = g_strdup(block->mr->name);
1011 for (c = sanitized_name; *c != '\0'; c++) {
1012 if (*c == '/')
1013 *c = '_';
1016 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1017 sanitized_name);
1018 g_free(sanitized_name);
1020 fd = mkstemp(filename);
1021 if (fd < 0) {
1022 perror("unable to create backing store for hugepages");
1023 g_free(filename);
1024 return NULL;
1026 unlink(filename);
1027 g_free(filename);
1029 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1032 * ftruncate is not supported by hugetlbfs in older
1033 * hosts, so don't bother bailing out on errors.
1034 * If anything goes wrong with it under other filesystems,
1035 * mmap will fail.
1037 if (ftruncate(fd, memory))
1038 perror("ftruncate");
1040 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1041 if (area == MAP_FAILED) {
1042 perror("file_ram_alloc: can't mmap RAM pages");
1043 close(fd);
1044 return (NULL);
1047 if (mem_prealloc) {
1048 int ret, i;
1049 struct sigaction act, oldact;
1050 sigset_t set, oldset;
1052 memset(&act, 0, sizeof(act));
1053 act.sa_handler = &sigbus_handler;
1054 act.sa_flags = 0;
1056 ret = sigaction(SIGBUS, &act, &oldact);
1057 if (ret) {
1058 perror("file_ram_alloc: failed to install signal handler");
1059 exit(1);
1062 /* unblock SIGBUS */
1063 sigemptyset(&set);
1064 sigaddset(&set, SIGBUS);
1065 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1067 if (sigsetjmp(sigjump, 1)) {
1068 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1069 exit(1);
1072 /* MAP_POPULATE silently ignores failures */
1073 for (i = 0; i < (memory/hpagesize); i++) {
1074 memset(area + (hpagesize*i), 0, 1);
1077 ret = sigaction(SIGBUS, &oldact, NULL);
1078 if (ret) {
1079 perror("file_ram_alloc: failed to reinstall signal handler");
1080 exit(1);
1083 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1086 block->fd = fd;
1087 return area;
1089 #else
1090 static void *file_ram_alloc(RAMBlock *block,
1091 ram_addr_t memory,
1092 const char *path)
1094 fprintf(stderr, "-mem-path not supported on this host\n");
1095 exit(1);
1097 #endif
1099 static ram_addr_t find_ram_offset(ram_addr_t size)
1101 RAMBlock *block, *next_block;
1102 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1104 assert(size != 0); /* it would hand out same offset multiple times */
1106 if (QTAILQ_EMPTY(&ram_list.blocks))
1107 return 0;
1109 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1110 ram_addr_t end, next = RAM_ADDR_MAX;
1112 end = block->offset + block->length;
1114 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1115 if (next_block->offset >= end) {
1116 next = MIN(next, next_block->offset);
1119 if (next - end >= size && next - end < mingap) {
1120 offset = end;
1121 mingap = next - end;
1125 if (offset == RAM_ADDR_MAX) {
1126 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1127 (uint64_t)size);
1128 abort();
1131 return offset;
1134 ram_addr_t last_ram_offset(void)
1136 RAMBlock *block;
1137 ram_addr_t last = 0;
1139 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1140 last = MAX(last, block->offset + block->length);
1142 return last;
1145 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1147 int ret;
1149 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1150 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1151 "dump-guest-core", true)) {
1152 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1153 if (ret) {
1154 perror("qemu_madvise");
1155 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1156 "but dump_guest_core=off specified\n");
1161 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1163 RAMBlock *new_block, *block;
1165 new_block = NULL;
1166 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1167 if (block->offset == addr) {
1168 new_block = block;
1169 break;
1172 assert(new_block);
1173 assert(!new_block->idstr[0]);
1175 if (dev) {
1176 char *id = qdev_get_dev_path(dev);
1177 if (id) {
1178 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1179 g_free(id);
1182 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1184 /* This assumes the iothread lock is taken here too. */
1185 qemu_mutex_lock_ramlist();
1186 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1187 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1188 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1189 new_block->idstr);
1190 abort();
1193 qemu_mutex_unlock_ramlist();
1196 static int memory_try_enable_merging(void *addr, size_t len)
1198 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1199 /* disabled by the user */
1200 return 0;
1203 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1206 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1207 MemoryRegion *mr)
1209 RAMBlock *block, *new_block;
1210 ram_addr_t old_ram_size, new_ram_size;
1212 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1214 size = TARGET_PAGE_ALIGN(size);
1215 new_block = g_malloc0(sizeof(*new_block));
1216 new_block->fd = -1;
1218 /* This assumes the iothread lock is taken here too. */
1219 qemu_mutex_lock_ramlist();
1220 new_block->mr = mr;
1221 new_block->offset = find_ram_offset(size);
1222 if (host) {
1223 new_block->host = host;
1224 new_block->flags |= RAM_PREALLOC_MASK;
1225 } else if (xen_enabled()) {
1226 if (mem_path) {
1227 fprintf(stderr, "-mem-path not supported with Xen\n");
1228 exit(1);
1230 xen_ram_alloc(new_block->offset, size, mr);
1231 } else {
1232 if (mem_path) {
1233 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1235 * file_ram_alloc() needs to allocate just like
1236 * phys_mem_alloc, but we haven't bothered to provide
1237 * a hook there.
1239 fprintf(stderr,
1240 "-mem-path not supported with this accelerator\n");
1241 exit(1);
1243 new_block->host = file_ram_alloc(new_block, size, mem_path);
1245 if (!new_block->host) {
1246 new_block->host = phys_mem_alloc(size);
1247 if (!new_block->host) {
1248 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1249 new_block->mr->name, strerror(errno));
1250 exit(1);
1252 memory_try_enable_merging(new_block->host, size);
1255 new_block->length = size;
1257 /* Keep the list sorted from biggest to smallest block. */
1258 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1259 if (block->length < new_block->length) {
1260 break;
1263 if (block) {
1264 QTAILQ_INSERT_BEFORE(block, new_block, next);
1265 } else {
1266 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1268 ram_list.mru_block = NULL;
1270 ram_list.version++;
1271 qemu_mutex_unlock_ramlist();
1273 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1275 if (new_ram_size > old_ram_size) {
1276 int i;
1277 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1278 ram_list.dirty_memory[i] =
1279 bitmap_zero_extend(ram_list.dirty_memory[i],
1280 old_ram_size, new_ram_size);
1283 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1285 qemu_ram_setup_dump(new_block->host, size);
1286 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1287 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1289 if (kvm_enabled())
1290 kvm_setup_guest_memory(new_block->host, size);
1292 return new_block->offset;
1295 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1297 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1300 void qemu_ram_free_from_ptr(ram_addr_t addr)
1302 RAMBlock *block;
1304 /* This assumes the iothread lock is taken here too. */
1305 qemu_mutex_lock_ramlist();
1306 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1307 if (addr == block->offset) {
1308 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1309 ram_list.mru_block = NULL;
1310 ram_list.version++;
1311 g_free(block);
1312 break;
1315 qemu_mutex_unlock_ramlist();
1318 void qemu_ram_free(ram_addr_t addr)
1320 RAMBlock *block;
1322 /* This assumes the iothread lock is taken here too. */
1323 qemu_mutex_lock_ramlist();
1324 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1325 if (addr == block->offset) {
1326 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1327 ram_list.mru_block = NULL;
1328 ram_list.version++;
1329 if (block->flags & RAM_PREALLOC_MASK) {
1331 } else if (xen_enabled()) {
1332 xen_invalidate_map_cache_entry(block->host);
1333 #ifndef _WIN32
1334 } else if (block->fd >= 0) {
1335 munmap(block->host, block->length);
1336 close(block->fd);
1337 #endif
1338 } else {
1339 qemu_anon_ram_free(block->host, block->length);
1341 g_free(block);
1342 break;
1345 qemu_mutex_unlock_ramlist();
1349 #ifndef _WIN32
1350 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1352 RAMBlock *block;
1353 ram_addr_t offset;
1354 int flags;
1355 void *area, *vaddr;
1357 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1358 offset = addr - block->offset;
1359 if (offset < block->length) {
1360 vaddr = block->host + offset;
1361 if (block->flags & RAM_PREALLOC_MASK) {
1363 } else if (xen_enabled()) {
1364 abort();
1365 } else {
1366 flags = MAP_FIXED;
1367 munmap(vaddr, length);
1368 if (block->fd >= 0) {
1369 #ifdef MAP_POPULATE
1370 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1371 MAP_PRIVATE;
1372 #else
1373 flags |= MAP_PRIVATE;
1374 #endif
1375 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1376 flags, block->fd, offset);
1377 } else {
1379 * Remap needs to match alloc. Accelerators that
1380 * set phys_mem_alloc never remap. If they did,
1381 * we'd need a remap hook here.
1383 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1385 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1386 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1387 flags, -1, 0);
1389 if (area != vaddr) {
1390 fprintf(stderr, "Could not remap addr: "
1391 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1392 length, addr);
1393 exit(1);
1395 memory_try_enable_merging(vaddr, length);
1396 qemu_ram_setup_dump(vaddr, length);
1398 return;
1402 #endif /* !_WIN32 */
1404 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1405 With the exception of the softmmu code in this file, this should
1406 only be used for local memory (e.g. video ram) that the device owns,
1407 and knows it isn't going to access beyond the end of the block.
1409 It should not be used for general purpose DMA.
1410 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1412 void *qemu_get_ram_ptr(ram_addr_t addr)
1414 RAMBlock *block = qemu_get_ram_block(addr);
1416 if (xen_enabled()) {
1417 /* We need to check if the requested address is in the RAM
1418 * because we don't want to map the entire memory in QEMU.
1419 * In that case just map until the end of the page.
1421 if (block->offset == 0) {
1422 return xen_map_cache(addr, 0, 0);
1423 } else if (block->host == NULL) {
1424 block->host =
1425 xen_map_cache(block->offset, block->length, 1);
1428 return block->host + (addr - block->offset);
1431 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1432 * but takes a size argument */
1433 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1435 if (*size == 0) {
1436 return NULL;
1438 if (xen_enabled()) {
1439 return xen_map_cache(addr, *size, 1);
1440 } else {
1441 RAMBlock *block;
1443 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1444 if (addr - block->offset < block->length) {
1445 if (addr - block->offset + *size > block->length)
1446 *size = block->length - addr + block->offset;
1447 return block->host + (addr - block->offset);
1451 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1452 abort();
1456 /* Some of the softmmu routines need to translate from a host pointer
1457 (typically a TLB entry) back to a ram offset. */
1458 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1460 RAMBlock *block;
1461 uint8_t *host = ptr;
1463 if (xen_enabled()) {
1464 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1465 return qemu_get_ram_block(*ram_addr)->mr;
1468 block = ram_list.mru_block;
1469 if (block && block->host && host - block->host < block->length) {
1470 goto found;
1473 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1474 /* This case append when the block is not mapped. */
1475 if (block->host == NULL) {
1476 continue;
1478 if (host - block->host < block->length) {
1479 goto found;
1483 return NULL;
1485 found:
1486 *ram_addr = block->offset + (host - block->host);
1487 return block->mr;
1490 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1491 uint64_t val, unsigned size)
1493 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1494 tb_invalidate_phys_page_fast(ram_addr, size);
1496 switch (size) {
1497 case 1:
1498 stb_p(qemu_get_ram_ptr(ram_addr), val);
1499 break;
1500 case 2:
1501 stw_p(qemu_get_ram_ptr(ram_addr), val);
1502 break;
1503 case 4:
1504 stl_p(qemu_get_ram_ptr(ram_addr), val);
1505 break;
1506 default:
1507 abort();
1509 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1510 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1511 /* we remove the notdirty callback only if the code has been
1512 flushed */
1513 if (!cpu_physical_memory_is_clean(ram_addr)) {
1514 CPUArchState *env = current_cpu->env_ptr;
1515 tlb_set_dirty(env, env->mem_io_vaddr);
1519 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1520 unsigned size, bool is_write)
1522 return is_write;
1525 static const MemoryRegionOps notdirty_mem_ops = {
1526 .write = notdirty_mem_write,
1527 .valid.accepts = notdirty_mem_accepts,
1528 .endianness = DEVICE_NATIVE_ENDIAN,
1531 /* Generate a debug exception if a watchpoint has been hit. */
1532 static void check_watchpoint(int offset, int len_mask, int flags)
1534 CPUArchState *env = current_cpu->env_ptr;
1535 target_ulong pc, cs_base;
1536 target_ulong vaddr;
1537 CPUWatchpoint *wp;
1538 int cpu_flags;
1540 if (env->watchpoint_hit) {
1541 /* We re-entered the check after replacing the TB. Now raise
1542 * the debug interrupt so that is will trigger after the
1543 * current instruction. */
1544 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1545 return;
1547 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1548 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1549 if ((vaddr == (wp->vaddr & len_mask) ||
1550 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1551 wp->flags |= BP_WATCHPOINT_HIT;
1552 if (!env->watchpoint_hit) {
1553 env->watchpoint_hit = wp;
1554 tb_check_watchpoint(env);
1555 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1556 env->exception_index = EXCP_DEBUG;
1557 cpu_loop_exit(env);
1558 } else {
1559 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1560 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1561 cpu_resume_from_signal(env, NULL);
1564 } else {
1565 wp->flags &= ~BP_WATCHPOINT_HIT;
1570 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1571 so these check for a hit then pass through to the normal out-of-line
1572 phys routines. */
1573 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1574 unsigned size)
1576 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1577 switch (size) {
1578 case 1: return ldub_phys(addr);
1579 case 2: return lduw_phys(addr);
1580 case 4: return ldl_phys(addr);
1581 default: abort();
1585 static void watch_mem_write(void *opaque, hwaddr addr,
1586 uint64_t val, unsigned size)
1588 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1589 switch (size) {
1590 case 1:
1591 stb_phys(addr, val);
1592 break;
1593 case 2:
1594 stw_phys(addr, val);
1595 break;
1596 case 4:
1597 stl_phys(addr, val);
1598 break;
1599 default: abort();
1603 static const MemoryRegionOps watch_mem_ops = {
1604 .read = watch_mem_read,
1605 .write = watch_mem_write,
1606 .endianness = DEVICE_NATIVE_ENDIAN,
1609 static uint64_t subpage_read(void *opaque, hwaddr addr,
1610 unsigned len)
1612 subpage_t *subpage = opaque;
1613 uint8_t buf[4];
1615 #if defined(DEBUG_SUBPAGE)
1616 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1617 subpage, len, addr);
1618 #endif
1619 address_space_read(subpage->as, addr + subpage->base, buf, len);
1620 switch (len) {
1621 case 1:
1622 return ldub_p(buf);
1623 case 2:
1624 return lduw_p(buf);
1625 case 4:
1626 return ldl_p(buf);
1627 default:
1628 abort();
1632 static void subpage_write(void *opaque, hwaddr addr,
1633 uint64_t value, unsigned len)
1635 subpage_t *subpage = opaque;
1636 uint8_t buf[4];
1638 #if defined(DEBUG_SUBPAGE)
1639 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1640 " value %"PRIx64"\n",
1641 __func__, subpage, len, addr, value);
1642 #endif
1643 switch (len) {
1644 case 1:
1645 stb_p(buf, value);
1646 break;
1647 case 2:
1648 stw_p(buf, value);
1649 break;
1650 case 4:
1651 stl_p(buf, value);
1652 break;
1653 default:
1654 abort();
1656 address_space_write(subpage->as, addr + subpage->base, buf, len);
1659 static bool subpage_accepts(void *opaque, hwaddr addr,
1660 unsigned len, bool is_write)
1662 subpage_t *subpage = opaque;
1663 #if defined(DEBUG_SUBPAGE)
1664 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1665 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1666 #endif
1668 return address_space_access_valid(subpage->as, addr + subpage->base,
1669 len, is_write);
1672 static const MemoryRegionOps subpage_ops = {
1673 .read = subpage_read,
1674 .write = subpage_write,
1675 .valid.accepts = subpage_accepts,
1676 .endianness = DEVICE_NATIVE_ENDIAN,
1679 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1680 uint16_t section)
1682 int idx, eidx;
1684 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1685 return -1;
1686 idx = SUBPAGE_IDX(start);
1687 eidx = SUBPAGE_IDX(end);
1688 #if defined(DEBUG_SUBPAGE)
1689 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1690 __func__, mmio, start, end, idx, eidx, section);
1691 #endif
1692 for (; idx <= eidx; idx++) {
1693 mmio->sub_section[idx] = section;
1696 return 0;
1699 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1701 subpage_t *mmio;
1703 mmio = g_malloc0(sizeof(subpage_t));
1705 mmio->as = as;
1706 mmio->base = base;
1707 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1708 "subpage", TARGET_PAGE_SIZE);
1709 mmio->iomem.subpage = true;
1710 #if defined(DEBUG_SUBPAGE)
1711 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1712 mmio, base, TARGET_PAGE_SIZE);
1713 #endif
1714 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1716 return mmio;
1719 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1721 MemoryRegionSection section = {
1722 .mr = mr,
1723 .offset_within_address_space = 0,
1724 .offset_within_region = 0,
1725 .size = int128_2_64(),
1728 return phys_section_add(map, &section);
1731 MemoryRegion *iotlb_to_region(hwaddr index)
1733 return address_space_memory.dispatch->map.sections[
1734 index & ~TARGET_PAGE_MASK].mr;
1737 static void io_mem_init(void)
1739 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1740 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1741 "unassigned", UINT64_MAX);
1742 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1743 "notdirty", UINT64_MAX);
1744 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1745 "watch", UINT64_MAX);
1748 static void mem_begin(MemoryListener *listener)
1750 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1751 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1752 uint16_t n;
1754 n = dummy_section(&d->map, &io_mem_unassigned);
1755 assert(n == PHYS_SECTION_UNASSIGNED);
1756 n = dummy_section(&d->map, &io_mem_notdirty);
1757 assert(n == PHYS_SECTION_NOTDIRTY);
1758 n = dummy_section(&d->map, &io_mem_rom);
1759 assert(n == PHYS_SECTION_ROM);
1760 n = dummy_section(&d->map, &io_mem_watch);
1761 assert(n == PHYS_SECTION_WATCH);
1763 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1764 d->as = as;
1765 as->next_dispatch = d;
1768 static void mem_commit(MemoryListener *listener)
1770 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1771 AddressSpaceDispatch *cur = as->dispatch;
1772 AddressSpaceDispatch *next = as->next_dispatch;
1774 phys_page_compact_all(next, next->map.nodes_nb);
1776 as->dispatch = next;
1778 if (cur) {
1779 phys_sections_free(&cur->map);
1780 g_free(cur);
1784 static void tcg_commit(MemoryListener *listener)
1786 CPUState *cpu;
1788 /* since each CPU stores ram addresses in its TLB cache, we must
1789 reset the modified entries */
1790 /* XXX: slow ! */
1791 CPU_FOREACH(cpu) {
1792 CPUArchState *env = cpu->env_ptr;
1794 tlb_flush(env, 1);
1798 static void core_log_global_start(MemoryListener *listener)
1800 cpu_physical_memory_set_dirty_tracking(true);
1803 static void core_log_global_stop(MemoryListener *listener)
1805 cpu_physical_memory_set_dirty_tracking(false);
1808 static MemoryListener core_memory_listener = {
1809 .log_global_start = core_log_global_start,
1810 .log_global_stop = core_log_global_stop,
1811 .priority = 1,
1814 static MemoryListener tcg_memory_listener = {
1815 .commit = tcg_commit,
1818 void address_space_init_dispatch(AddressSpace *as)
1820 as->dispatch = NULL;
1821 as->dispatch_listener = (MemoryListener) {
1822 .begin = mem_begin,
1823 .commit = mem_commit,
1824 .region_add = mem_add,
1825 .region_nop = mem_add,
1826 .priority = 0,
1828 memory_listener_register(&as->dispatch_listener, as);
1831 void address_space_destroy_dispatch(AddressSpace *as)
1833 AddressSpaceDispatch *d = as->dispatch;
1835 memory_listener_unregister(&as->dispatch_listener);
1836 g_free(d);
1837 as->dispatch = NULL;
1840 static void memory_map_init(void)
1842 system_memory = g_malloc(sizeof(*system_memory));
1844 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1845 address_space_init(&address_space_memory, system_memory, "memory");
1847 system_io = g_malloc(sizeof(*system_io));
1848 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1849 65536);
1850 address_space_init(&address_space_io, system_io, "I/O");
1852 memory_listener_register(&core_memory_listener, &address_space_memory);
1853 if (tcg_enabled()) {
1854 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1858 MemoryRegion *get_system_memory(void)
1860 return system_memory;
1863 MemoryRegion *get_system_io(void)
1865 return system_io;
1868 #endif /* !defined(CONFIG_USER_ONLY) */
1870 /* physical memory access (slow version, mainly for debug) */
1871 #if defined(CONFIG_USER_ONLY)
1872 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1873 uint8_t *buf, int len, int is_write)
1875 int l, flags;
1876 target_ulong page;
1877 void * p;
1879 while (len > 0) {
1880 page = addr & TARGET_PAGE_MASK;
1881 l = (page + TARGET_PAGE_SIZE) - addr;
1882 if (l > len)
1883 l = len;
1884 flags = page_get_flags(page);
1885 if (!(flags & PAGE_VALID))
1886 return -1;
1887 if (is_write) {
1888 if (!(flags & PAGE_WRITE))
1889 return -1;
1890 /* XXX: this code should not depend on lock_user */
1891 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1892 return -1;
1893 memcpy(p, buf, l);
1894 unlock_user(p, addr, l);
1895 } else {
1896 if (!(flags & PAGE_READ))
1897 return -1;
1898 /* XXX: this code should not depend on lock_user */
1899 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1900 return -1;
1901 memcpy(buf, p, l);
1902 unlock_user(p, addr, 0);
1904 len -= l;
1905 buf += l;
1906 addr += l;
1908 return 0;
1911 #else
1913 static void invalidate_and_set_dirty(hwaddr addr,
1914 hwaddr length)
1916 if (cpu_physical_memory_is_clean(addr)) {
1917 /* invalidate code */
1918 tb_invalidate_phys_page_range(addr, addr + length, 0);
1919 /* set dirty bit */
1920 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1921 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1923 xen_modified_memory(addr, length);
1926 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1928 if (memory_region_is_ram(mr)) {
1929 return !(is_write && mr->readonly);
1931 if (memory_region_is_romd(mr)) {
1932 return !is_write;
1935 return false;
1938 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1940 unsigned access_size_max = mr->ops->valid.max_access_size;
1942 /* Regions are assumed to support 1-4 byte accesses unless
1943 otherwise specified. */
1944 if (access_size_max == 0) {
1945 access_size_max = 4;
1948 /* Bound the maximum access by the alignment of the address. */
1949 if (!mr->ops->impl.unaligned) {
1950 unsigned align_size_max = addr & -addr;
1951 if (align_size_max != 0 && align_size_max < access_size_max) {
1952 access_size_max = align_size_max;
1956 /* Don't attempt accesses larger than the maximum. */
1957 if (l > access_size_max) {
1958 l = access_size_max;
1960 if (l & (l - 1)) {
1961 l = 1 << (qemu_fls(l) - 1);
1964 return l;
1967 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1968 int len, bool is_write)
1970 hwaddr l;
1971 uint8_t *ptr;
1972 uint64_t val;
1973 hwaddr addr1;
1974 MemoryRegion *mr;
1975 bool error = false;
1977 while (len > 0) {
1978 l = len;
1979 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1981 if (is_write) {
1982 if (!memory_access_is_direct(mr, is_write)) {
1983 l = memory_access_size(mr, l, addr1);
1984 /* XXX: could force current_cpu to NULL to avoid
1985 potential bugs */
1986 switch (l) {
1987 case 8:
1988 /* 64 bit write access */
1989 val = ldq_p(buf);
1990 error |= io_mem_write(mr, addr1, val, 8);
1991 break;
1992 case 4:
1993 /* 32 bit write access */
1994 val = ldl_p(buf);
1995 error |= io_mem_write(mr, addr1, val, 4);
1996 break;
1997 case 2:
1998 /* 16 bit write access */
1999 val = lduw_p(buf);
2000 error |= io_mem_write(mr, addr1, val, 2);
2001 break;
2002 case 1:
2003 /* 8 bit write access */
2004 val = ldub_p(buf);
2005 error |= io_mem_write(mr, addr1, val, 1);
2006 break;
2007 default:
2008 abort();
2010 } else {
2011 addr1 += memory_region_get_ram_addr(mr);
2012 /* RAM case */
2013 ptr = qemu_get_ram_ptr(addr1);
2014 memcpy(ptr, buf, l);
2015 invalidate_and_set_dirty(addr1, l);
2017 } else {
2018 if (!memory_access_is_direct(mr, is_write)) {
2019 /* I/O case */
2020 l = memory_access_size(mr, l, addr1);
2021 switch (l) {
2022 case 8:
2023 /* 64 bit read access */
2024 error |= io_mem_read(mr, addr1, &val, 8);
2025 stq_p(buf, val);
2026 break;
2027 case 4:
2028 /* 32 bit read access */
2029 error |= io_mem_read(mr, addr1, &val, 4);
2030 stl_p(buf, val);
2031 break;
2032 case 2:
2033 /* 16 bit read access */
2034 error |= io_mem_read(mr, addr1, &val, 2);
2035 stw_p(buf, val);
2036 break;
2037 case 1:
2038 /* 8 bit read access */
2039 error |= io_mem_read(mr, addr1, &val, 1);
2040 stb_p(buf, val);
2041 break;
2042 default:
2043 abort();
2045 } else {
2046 /* RAM case */
2047 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2048 memcpy(buf, ptr, l);
2051 len -= l;
2052 buf += l;
2053 addr += l;
2056 return error;
2059 bool address_space_write(AddressSpace *as, hwaddr addr,
2060 const uint8_t *buf, int len)
2062 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2065 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2067 return address_space_rw(as, addr, buf, len, false);
2071 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2072 int len, int is_write)
2074 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2077 enum write_rom_type {
2078 WRITE_DATA,
2079 FLUSH_CACHE,
2082 static inline void cpu_physical_memory_write_rom_internal(
2083 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2085 hwaddr l;
2086 uint8_t *ptr;
2087 hwaddr addr1;
2088 MemoryRegion *mr;
2090 while (len > 0) {
2091 l = len;
2092 mr = address_space_translate(&address_space_memory,
2093 addr, &addr1, &l, true);
2095 if (!(memory_region_is_ram(mr) ||
2096 memory_region_is_romd(mr))) {
2097 /* do nothing */
2098 } else {
2099 addr1 += memory_region_get_ram_addr(mr);
2100 /* ROM/RAM case */
2101 ptr = qemu_get_ram_ptr(addr1);
2102 switch (type) {
2103 case WRITE_DATA:
2104 memcpy(ptr, buf, l);
2105 invalidate_and_set_dirty(addr1, l);
2106 break;
2107 case FLUSH_CACHE:
2108 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2109 break;
2112 len -= l;
2113 buf += l;
2114 addr += l;
2118 /* used for ROM loading : can write in RAM and ROM */
2119 void cpu_physical_memory_write_rom(hwaddr addr,
2120 const uint8_t *buf, int len)
2122 cpu_physical_memory_write_rom_internal(addr, buf, len, WRITE_DATA);
2125 void cpu_flush_icache_range(hwaddr start, int len)
2128 * This function should do the same thing as an icache flush that was
2129 * triggered from within the guest. For TCG we are always cache coherent,
2130 * so there is no need to flush anything. For KVM / Xen we need to flush
2131 * the host's instruction cache at least.
2133 if (tcg_enabled()) {
2134 return;
2137 cpu_physical_memory_write_rom_internal(start, NULL, len, FLUSH_CACHE);
2140 typedef struct {
2141 MemoryRegion *mr;
2142 void *buffer;
2143 hwaddr addr;
2144 hwaddr len;
2145 } BounceBuffer;
2147 static BounceBuffer bounce;
2149 typedef struct MapClient {
2150 void *opaque;
2151 void (*callback)(void *opaque);
2152 QLIST_ENTRY(MapClient) link;
2153 } MapClient;
2155 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2156 = QLIST_HEAD_INITIALIZER(map_client_list);
2158 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2160 MapClient *client = g_malloc(sizeof(*client));
2162 client->opaque = opaque;
2163 client->callback = callback;
2164 QLIST_INSERT_HEAD(&map_client_list, client, link);
2165 return client;
2168 static void cpu_unregister_map_client(void *_client)
2170 MapClient *client = (MapClient *)_client;
2172 QLIST_REMOVE(client, link);
2173 g_free(client);
2176 static void cpu_notify_map_clients(void)
2178 MapClient *client;
2180 while (!QLIST_EMPTY(&map_client_list)) {
2181 client = QLIST_FIRST(&map_client_list);
2182 client->callback(client->opaque);
2183 cpu_unregister_map_client(client);
2187 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2189 MemoryRegion *mr;
2190 hwaddr l, xlat;
2192 while (len > 0) {
2193 l = len;
2194 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2195 if (!memory_access_is_direct(mr, is_write)) {
2196 l = memory_access_size(mr, l, addr);
2197 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2198 return false;
2202 len -= l;
2203 addr += l;
2205 return true;
2208 /* Map a physical memory region into a host virtual address.
2209 * May map a subset of the requested range, given by and returned in *plen.
2210 * May return NULL if resources needed to perform the mapping are exhausted.
2211 * Use only for reads OR writes - not for read-modify-write operations.
2212 * Use cpu_register_map_client() to know when retrying the map operation is
2213 * likely to succeed.
2215 void *address_space_map(AddressSpace *as,
2216 hwaddr addr,
2217 hwaddr *plen,
2218 bool is_write)
2220 hwaddr len = *plen;
2221 hwaddr done = 0;
2222 hwaddr l, xlat, base;
2223 MemoryRegion *mr, *this_mr;
2224 ram_addr_t raddr;
2226 if (len == 0) {
2227 return NULL;
2230 l = len;
2231 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2232 if (!memory_access_is_direct(mr, is_write)) {
2233 if (bounce.buffer) {
2234 return NULL;
2236 /* Avoid unbounded allocations */
2237 l = MIN(l, TARGET_PAGE_SIZE);
2238 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2239 bounce.addr = addr;
2240 bounce.len = l;
2242 memory_region_ref(mr);
2243 bounce.mr = mr;
2244 if (!is_write) {
2245 address_space_read(as, addr, bounce.buffer, l);
2248 *plen = l;
2249 return bounce.buffer;
2252 base = xlat;
2253 raddr = memory_region_get_ram_addr(mr);
2255 for (;;) {
2256 len -= l;
2257 addr += l;
2258 done += l;
2259 if (len == 0) {
2260 break;
2263 l = len;
2264 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2265 if (this_mr != mr || xlat != base + done) {
2266 break;
2270 memory_region_ref(mr);
2271 *plen = done;
2272 return qemu_ram_ptr_length(raddr + base, plen);
2275 /* Unmaps a memory region previously mapped by address_space_map().
2276 * Will also mark the memory as dirty if is_write == 1. access_len gives
2277 * the amount of memory that was actually read or written by the caller.
2279 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2280 int is_write, hwaddr access_len)
2282 if (buffer != bounce.buffer) {
2283 MemoryRegion *mr;
2284 ram_addr_t addr1;
2286 mr = qemu_ram_addr_from_host(buffer, &addr1);
2287 assert(mr != NULL);
2288 if (is_write) {
2289 while (access_len) {
2290 unsigned l;
2291 l = TARGET_PAGE_SIZE;
2292 if (l > access_len)
2293 l = access_len;
2294 invalidate_and_set_dirty(addr1, l);
2295 addr1 += l;
2296 access_len -= l;
2299 if (xen_enabled()) {
2300 xen_invalidate_map_cache_entry(buffer);
2302 memory_region_unref(mr);
2303 return;
2305 if (is_write) {
2306 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2308 qemu_vfree(bounce.buffer);
2309 bounce.buffer = NULL;
2310 memory_region_unref(bounce.mr);
2311 cpu_notify_map_clients();
2314 void *cpu_physical_memory_map(hwaddr addr,
2315 hwaddr *plen,
2316 int is_write)
2318 return address_space_map(&address_space_memory, addr, plen, is_write);
2321 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2322 int is_write, hwaddr access_len)
2324 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2327 /* warning: addr must be aligned */
2328 static inline uint32_t ldl_phys_internal(hwaddr addr,
2329 enum device_endian endian)
2331 uint8_t *ptr;
2332 uint64_t val;
2333 MemoryRegion *mr;
2334 hwaddr l = 4;
2335 hwaddr addr1;
2337 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2338 false);
2339 if (l < 4 || !memory_access_is_direct(mr, false)) {
2340 /* I/O case */
2341 io_mem_read(mr, addr1, &val, 4);
2342 #if defined(TARGET_WORDS_BIGENDIAN)
2343 if (endian == DEVICE_LITTLE_ENDIAN) {
2344 val = bswap32(val);
2346 #else
2347 if (endian == DEVICE_BIG_ENDIAN) {
2348 val = bswap32(val);
2350 #endif
2351 } else {
2352 /* RAM case */
2353 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2354 & TARGET_PAGE_MASK)
2355 + addr1);
2356 switch (endian) {
2357 case DEVICE_LITTLE_ENDIAN:
2358 val = ldl_le_p(ptr);
2359 break;
2360 case DEVICE_BIG_ENDIAN:
2361 val = ldl_be_p(ptr);
2362 break;
2363 default:
2364 val = ldl_p(ptr);
2365 break;
2368 return val;
2371 uint32_t ldl_phys(hwaddr addr)
2373 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2376 uint32_t ldl_le_phys(hwaddr addr)
2378 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2381 uint32_t ldl_be_phys(hwaddr addr)
2383 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2386 /* warning: addr must be aligned */
2387 static inline uint64_t ldq_phys_internal(hwaddr addr,
2388 enum device_endian endian)
2390 uint8_t *ptr;
2391 uint64_t val;
2392 MemoryRegion *mr;
2393 hwaddr l = 8;
2394 hwaddr addr1;
2396 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2397 false);
2398 if (l < 8 || !memory_access_is_direct(mr, false)) {
2399 /* I/O case */
2400 io_mem_read(mr, addr1, &val, 8);
2401 #if defined(TARGET_WORDS_BIGENDIAN)
2402 if (endian == DEVICE_LITTLE_ENDIAN) {
2403 val = bswap64(val);
2405 #else
2406 if (endian == DEVICE_BIG_ENDIAN) {
2407 val = bswap64(val);
2409 #endif
2410 } else {
2411 /* RAM case */
2412 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2413 & TARGET_PAGE_MASK)
2414 + addr1);
2415 switch (endian) {
2416 case DEVICE_LITTLE_ENDIAN:
2417 val = ldq_le_p(ptr);
2418 break;
2419 case DEVICE_BIG_ENDIAN:
2420 val = ldq_be_p(ptr);
2421 break;
2422 default:
2423 val = ldq_p(ptr);
2424 break;
2427 return val;
2430 uint64_t ldq_phys(hwaddr addr)
2432 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2435 uint64_t ldq_le_phys(hwaddr addr)
2437 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2440 uint64_t ldq_be_phys(hwaddr addr)
2442 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2445 /* XXX: optimize */
2446 uint32_t ldub_phys(hwaddr addr)
2448 uint8_t val;
2449 cpu_physical_memory_read(addr, &val, 1);
2450 return val;
2453 /* warning: addr must be aligned */
2454 static inline uint32_t lduw_phys_internal(hwaddr addr,
2455 enum device_endian endian)
2457 uint8_t *ptr;
2458 uint64_t val;
2459 MemoryRegion *mr;
2460 hwaddr l = 2;
2461 hwaddr addr1;
2463 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2464 false);
2465 if (l < 2 || !memory_access_is_direct(mr, false)) {
2466 /* I/O case */
2467 io_mem_read(mr, addr1, &val, 2);
2468 #if defined(TARGET_WORDS_BIGENDIAN)
2469 if (endian == DEVICE_LITTLE_ENDIAN) {
2470 val = bswap16(val);
2472 #else
2473 if (endian == DEVICE_BIG_ENDIAN) {
2474 val = bswap16(val);
2476 #endif
2477 } else {
2478 /* RAM case */
2479 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2480 & TARGET_PAGE_MASK)
2481 + addr1);
2482 switch (endian) {
2483 case DEVICE_LITTLE_ENDIAN:
2484 val = lduw_le_p(ptr);
2485 break;
2486 case DEVICE_BIG_ENDIAN:
2487 val = lduw_be_p(ptr);
2488 break;
2489 default:
2490 val = lduw_p(ptr);
2491 break;
2494 return val;
2497 uint32_t lduw_phys(hwaddr addr)
2499 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2502 uint32_t lduw_le_phys(hwaddr addr)
2504 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2507 uint32_t lduw_be_phys(hwaddr addr)
2509 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2512 /* warning: addr must be aligned. The ram page is not masked as dirty
2513 and the code inside is not invalidated. It is useful if the dirty
2514 bits are used to track modified PTEs */
2515 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2517 uint8_t *ptr;
2518 MemoryRegion *mr;
2519 hwaddr l = 4;
2520 hwaddr addr1;
2522 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2523 true);
2524 if (l < 4 || !memory_access_is_direct(mr, true)) {
2525 io_mem_write(mr, addr1, val, 4);
2526 } else {
2527 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2528 ptr = qemu_get_ram_ptr(addr1);
2529 stl_p(ptr, val);
2531 if (unlikely(in_migration)) {
2532 if (cpu_physical_memory_is_clean(addr1)) {
2533 /* invalidate code */
2534 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2535 /* set dirty bit */
2536 cpu_physical_memory_set_dirty_flag(addr1,
2537 DIRTY_MEMORY_MIGRATION);
2538 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2544 /* warning: addr must be aligned */
2545 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2546 enum device_endian endian)
2548 uint8_t *ptr;
2549 MemoryRegion *mr;
2550 hwaddr l = 4;
2551 hwaddr addr1;
2553 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2554 true);
2555 if (l < 4 || !memory_access_is_direct(mr, true)) {
2556 #if defined(TARGET_WORDS_BIGENDIAN)
2557 if (endian == DEVICE_LITTLE_ENDIAN) {
2558 val = bswap32(val);
2560 #else
2561 if (endian == DEVICE_BIG_ENDIAN) {
2562 val = bswap32(val);
2564 #endif
2565 io_mem_write(mr, addr1, val, 4);
2566 } else {
2567 /* RAM case */
2568 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2569 ptr = qemu_get_ram_ptr(addr1);
2570 switch (endian) {
2571 case DEVICE_LITTLE_ENDIAN:
2572 stl_le_p(ptr, val);
2573 break;
2574 case DEVICE_BIG_ENDIAN:
2575 stl_be_p(ptr, val);
2576 break;
2577 default:
2578 stl_p(ptr, val);
2579 break;
2581 invalidate_and_set_dirty(addr1, 4);
2585 void stl_phys(hwaddr addr, uint32_t val)
2587 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2590 void stl_le_phys(hwaddr addr, uint32_t val)
2592 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2595 void stl_be_phys(hwaddr addr, uint32_t val)
2597 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2600 /* XXX: optimize */
2601 void stb_phys(hwaddr addr, uint32_t val)
2603 uint8_t v = val;
2604 cpu_physical_memory_write(addr, &v, 1);
2607 /* warning: addr must be aligned */
2608 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2609 enum device_endian endian)
2611 uint8_t *ptr;
2612 MemoryRegion *mr;
2613 hwaddr l = 2;
2614 hwaddr addr1;
2616 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2617 true);
2618 if (l < 2 || !memory_access_is_direct(mr, true)) {
2619 #if defined(TARGET_WORDS_BIGENDIAN)
2620 if (endian == DEVICE_LITTLE_ENDIAN) {
2621 val = bswap16(val);
2623 #else
2624 if (endian == DEVICE_BIG_ENDIAN) {
2625 val = bswap16(val);
2627 #endif
2628 io_mem_write(mr, addr1, val, 2);
2629 } else {
2630 /* RAM case */
2631 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2632 ptr = qemu_get_ram_ptr(addr1);
2633 switch (endian) {
2634 case DEVICE_LITTLE_ENDIAN:
2635 stw_le_p(ptr, val);
2636 break;
2637 case DEVICE_BIG_ENDIAN:
2638 stw_be_p(ptr, val);
2639 break;
2640 default:
2641 stw_p(ptr, val);
2642 break;
2644 invalidate_and_set_dirty(addr1, 2);
2648 void stw_phys(hwaddr addr, uint32_t val)
2650 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2653 void stw_le_phys(hwaddr addr, uint32_t val)
2655 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2658 void stw_be_phys(hwaddr addr, uint32_t val)
2660 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2663 /* XXX: optimize */
2664 void stq_phys(hwaddr addr, uint64_t val)
2666 val = tswap64(val);
2667 cpu_physical_memory_write(addr, &val, 8);
2670 void stq_le_phys(hwaddr addr, uint64_t val)
2672 val = cpu_to_le64(val);
2673 cpu_physical_memory_write(addr, &val, 8);
2676 void stq_be_phys(hwaddr addr, uint64_t val)
2678 val = cpu_to_be64(val);
2679 cpu_physical_memory_write(addr, &val, 8);
2682 /* virtual memory access for debug (includes writing to ROM) */
2683 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2684 uint8_t *buf, int len, int is_write)
2686 int l;
2687 hwaddr phys_addr;
2688 target_ulong page;
2690 while (len > 0) {
2691 page = addr & TARGET_PAGE_MASK;
2692 phys_addr = cpu_get_phys_page_debug(cpu, page);
2693 /* if no physical page mapped, return an error */
2694 if (phys_addr == -1)
2695 return -1;
2696 l = (page + TARGET_PAGE_SIZE) - addr;
2697 if (l > len)
2698 l = len;
2699 phys_addr += (addr & ~TARGET_PAGE_MASK);
2700 if (is_write)
2701 cpu_physical_memory_write_rom(phys_addr, buf, l);
2702 else
2703 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2704 len -= l;
2705 buf += l;
2706 addr += l;
2708 return 0;
2710 #endif
2712 #if !defined(CONFIG_USER_ONLY)
2715 * A helper function for the _utterly broken_ virtio device model to find out if
2716 * it's running on a big endian machine. Don't do this at home kids!
2718 bool virtio_is_big_endian(void);
2719 bool virtio_is_big_endian(void)
2721 #if defined(TARGET_WORDS_BIGENDIAN)
2722 return true;
2723 #else
2724 return false;
2725 #endif
2728 #endif
2730 #ifndef CONFIG_USER_ONLY
2731 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2733 MemoryRegion*mr;
2734 hwaddr l = 1;
2736 mr = address_space_translate(&address_space_memory,
2737 phys_addr, &phys_addr, &l, false);
2739 return !(memory_region_is_ram(mr) ||
2740 memory_region_is_romd(mr));
2743 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2745 RAMBlock *block;
2747 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2748 func(block->host, block->offset, block->length, opaque);
2751 #endif