configure: add Linux libnuma detection
[qemu.git] / exec.c
blobbad16e061354a5faa545a9e0b9c58df69538da99
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
55 #include "qemu/range.h"
57 //#define DEBUG_SUBPAGE
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
73 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
74 #define RAM_PREALLOC (1 << 0)
76 #endif
78 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
79 /* current CPU in the current thread. It is only valid inside
80 cpu_exec() */
81 DEFINE_TLS(CPUState *, current_cpu);
82 /* 0 = Do not count executed instructions.
83 1 = Precise instruction counting.
84 2 = Adaptive rate instruction counting. */
85 int use_icount;
87 #if !defined(CONFIG_USER_ONLY)
89 typedef struct PhysPageEntry PhysPageEntry;
91 struct PhysPageEntry {
92 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
93 uint32_t skip : 6;
94 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
95 uint32_t ptr : 26;
98 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
100 /* Size of the L2 (and L3, etc) page tables. */
101 #define ADDR_SPACE_BITS 64
103 #define P_L2_BITS 9
104 #define P_L2_SIZE (1 << P_L2_BITS)
106 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
108 typedef PhysPageEntry Node[P_L2_SIZE];
110 typedef struct PhysPageMap {
111 unsigned sections_nb;
112 unsigned sections_nb_alloc;
113 unsigned nodes_nb;
114 unsigned nodes_nb_alloc;
115 Node *nodes;
116 MemoryRegionSection *sections;
117 } PhysPageMap;
119 struct AddressSpaceDispatch {
120 /* This is a multi-level map on the physical address space.
121 * The bottom level has pointers to MemoryRegionSections.
123 PhysPageEntry phys_map;
124 PhysPageMap map;
125 AddressSpace *as;
128 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
129 typedef struct subpage_t {
130 MemoryRegion iomem;
131 AddressSpace *as;
132 hwaddr base;
133 uint16_t sub_section[TARGET_PAGE_SIZE];
134 } subpage_t;
136 #define PHYS_SECTION_UNASSIGNED 0
137 #define PHYS_SECTION_NOTDIRTY 1
138 #define PHYS_SECTION_ROM 2
139 #define PHYS_SECTION_WATCH 3
141 static void io_mem_init(void);
142 static void memory_map_init(void);
143 static void tcg_commit(MemoryListener *listener);
145 static MemoryRegion io_mem_watch;
146 #endif
148 #if !defined(CONFIG_USER_ONLY)
150 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
152 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
153 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
154 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
155 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
159 static uint32_t phys_map_node_alloc(PhysPageMap *map)
161 unsigned i;
162 uint32_t ret;
164 ret = map->nodes_nb++;
165 assert(ret != PHYS_MAP_NODE_NIL);
166 assert(ret != map->nodes_nb_alloc);
167 for (i = 0; i < P_L2_SIZE; ++i) {
168 map->nodes[ret][i].skip = 1;
169 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
171 return ret;
174 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
175 hwaddr *index, hwaddr *nb, uint16_t leaf,
176 int level)
178 PhysPageEntry *p;
179 int i;
180 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
182 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
183 lp->ptr = phys_map_node_alloc(map);
184 p = map->nodes[lp->ptr];
185 if (level == 0) {
186 for (i = 0; i < P_L2_SIZE; i++) {
187 p[i].skip = 0;
188 p[i].ptr = PHYS_SECTION_UNASSIGNED;
191 } else {
192 p = map->nodes[lp->ptr];
194 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
196 while (*nb && lp < &p[P_L2_SIZE]) {
197 if ((*index & (step - 1)) == 0 && *nb >= step) {
198 lp->skip = 0;
199 lp->ptr = leaf;
200 *index += step;
201 *nb -= step;
202 } else {
203 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
205 ++lp;
209 static void phys_page_set(AddressSpaceDispatch *d,
210 hwaddr index, hwaddr nb,
211 uint16_t leaf)
213 /* Wildly overreserve - it doesn't matter much. */
214 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
216 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
219 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
220 * and update our entry so we can skip it and go directly to the destination.
222 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
224 unsigned valid_ptr = P_L2_SIZE;
225 int valid = 0;
226 PhysPageEntry *p;
227 int i;
229 if (lp->ptr == PHYS_MAP_NODE_NIL) {
230 return;
233 p = nodes[lp->ptr];
234 for (i = 0; i < P_L2_SIZE; i++) {
235 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
236 continue;
239 valid_ptr = i;
240 valid++;
241 if (p[i].skip) {
242 phys_page_compact(&p[i], nodes, compacted);
246 /* We can only compress if there's only one child. */
247 if (valid != 1) {
248 return;
251 assert(valid_ptr < P_L2_SIZE);
253 /* Don't compress if it won't fit in the # of bits we have. */
254 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
255 return;
258 lp->ptr = p[valid_ptr].ptr;
259 if (!p[valid_ptr].skip) {
260 /* If our only child is a leaf, make this a leaf. */
261 /* By design, we should have made this node a leaf to begin with so we
262 * should never reach here.
263 * But since it's so simple to handle this, let's do it just in case we
264 * change this rule.
266 lp->skip = 0;
267 } else {
268 lp->skip += p[valid_ptr].skip;
272 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
274 DECLARE_BITMAP(compacted, nodes_nb);
276 if (d->phys_map.skip) {
277 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
281 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
282 Node *nodes, MemoryRegionSection *sections)
284 PhysPageEntry *p;
285 hwaddr index = addr >> TARGET_PAGE_BITS;
286 int i;
288 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
289 if (lp.ptr == PHYS_MAP_NODE_NIL) {
290 return &sections[PHYS_SECTION_UNASSIGNED];
292 p = nodes[lp.ptr];
293 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
296 if (sections[lp.ptr].size.hi ||
297 range_covers_byte(sections[lp.ptr].offset_within_address_space,
298 sections[lp.ptr].size.lo, addr)) {
299 return &sections[lp.ptr];
300 } else {
301 return &sections[PHYS_SECTION_UNASSIGNED];
305 bool memory_region_is_unassigned(MemoryRegion *mr)
307 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
308 && mr != &io_mem_watch;
311 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
312 hwaddr addr,
313 bool resolve_subpage)
315 MemoryRegionSection *section;
316 subpage_t *subpage;
318 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
319 if (resolve_subpage && section->mr->subpage) {
320 subpage = container_of(section->mr, subpage_t, iomem);
321 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
323 return section;
326 static MemoryRegionSection *
327 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
328 hwaddr *plen, bool resolve_subpage)
330 MemoryRegionSection *section;
331 Int128 diff;
333 section = address_space_lookup_region(d, addr, resolve_subpage);
334 /* Compute offset within MemoryRegionSection */
335 addr -= section->offset_within_address_space;
337 /* Compute offset within MemoryRegion */
338 *xlat = addr + section->offset_within_region;
340 diff = int128_sub(section->mr->size, int128_make64(addr));
341 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
342 return section;
345 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
347 if (memory_region_is_ram(mr)) {
348 return !(is_write && mr->readonly);
350 if (memory_region_is_romd(mr)) {
351 return !is_write;
354 return false;
357 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
358 hwaddr *xlat, hwaddr *plen,
359 bool is_write)
361 IOMMUTLBEntry iotlb;
362 MemoryRegionSection *section;
363 MemoryRegion *mr;
364 hwaddr len = *plen;
366 for (;;) {
367 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
368 mr = section->mr;
370 if (!mr->iommu_ops) {
371 break;
374 iotlb = mr->iommu_ops->translate(mr, addr);
375 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
376 | (addr & iotlb.addr_mask));
377 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
378 if (!(iotlb.perm & (1 << is_write))) {
379 mr = &io_mem_unassigned;
380 break;
383 as = iotlb.target_as;
386 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
387 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
388 len = MIN(page, len);
391 *plen = len;
392 *xlat = addr;
393 return mr;
396 MemoryRegionSection *
397 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
398 hwaddr *plen)
400 MemoryRegionSection *section;
401 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
403 assert(!section->mr->iommu_ops);
404 return section;
406 #endif
408 void cpu_exec_init_all(void)
410 #if !defined(CONFIG_USER_ONLY)
411 qemu_mutex_init(&ram_list.mutex);
412 memory_map_init();
413 io_mem_init();
414 #endif
417 #if !defined(CONFIG_USER_ONLY)
419 static int cpu_common_post_load(void *opaque, int version_id)
421 CPUState *cpu = opaque;
423 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
424 version_id is increased. */
425 cpu->interrupt_request &= ~0x01;
426 tlb_flush(cpu, 1);
428 return 0;
431 const VMStateDescription vmstate_cpu_common = {
432 .name = "cpu_common",
433 .version_id = 1,
434 .minimum_version_id = 1,
435 .post_load = cpu_common_post_load,
436 .fields = (VMStateField[]) {
437 VMSTATE_UINT32(halted, CPUState),
438 VMSTATE_UINT32(interrupt_request, CPUState),
439 VMSTATE_END_OF_LIST()
443 #endif
445 CPUState *qemu_get_cpu(int index)
447 CPUState *cpu;
449 CPU_FOREACH(cpu) {
450 if (cpu->cpu_index == index) {
451 return cpu;
455 return NULL;
458 #if !defined(CONFIG_USER_ONLY)
459 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
461 /* We only support one address space per cpu at the moment. */
462 assert(cpu->as == as);
464 if (cpu->tcg_as_listener) {
465 memory_listener_unregister(cpu->tcg_as_listener);
466 } else {
467 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
469 cpu->tcg_as_listener->commit = tcg_commit;
470 memory_listener_register(cpu->tcg_as_listener, as);
472 #endif
474 void cpu_exec_init(CPUArchState *env)
476 CPUState *cpu = ENV_GET_CPU(env);
477 CPUClass *cc = CPU_GET_CLASS(cpu);
478 CPUState *some_cpu;
479 int cpu_index;
481 #if defined(CONFIG_USER_ONLY)
482 cpu_list_lock();
483 #endif
484 cpu_index = 0;
485 CPU_FOREACH(some_cpu) {
486 cpu_index++;
488 cpu->cpu_index = cpu_index;
489 cpu->numa_node = 0;
490 QTAILQ_INIT(&cpu->breakpoints);
491 QTAILQ_INIT(&cpu->watchpoints);
492 #ifndef CONFIG_USER_ONLY
493 cpu->as = &address_space_memory;
494 cpu->thread_id = qemu_get_thread_id();
495 #endif
496 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
497 #if defined(CONFIG_USER_ONLY)
498 cpu_list_unlock();
499 #endif
500 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
501 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
503 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
504 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
505 cpu_save, cpu_load, env);
506 assert(cc->vmsd == NULL);
507 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
508 #endif
509 if (cc->vmsd != NULL) {
510 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
514 #if defined(TARGET_HAS_ICE)
515 #if defined(CONFIG_USER_ONLY)
516 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
518 tb_invalidate_phys_page_range(pc, pc + 1, 0);
520 #else
521 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
523 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
524 if (phys != -1) {
525 tb_invalidate_phys_addr(cpu->as,
526 phys | (pc & ~TARGET_PAGE_MASK));
529 #endif
530 #endif /* TARGET_HAS_ICE */
532 #if defined(CONFIG_USER_ONLY)
533 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
538 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
539 int flags, CPUWatchpoint **watchpoint)
541 return -ENOSYS;
543 #else
544 /* Add a watchpoint. */
545 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
546 int flags, CPUWatchpoint **watchpoint)
548 vaddr len_mask = ~(len - 1);
549 CPUWatchpoint *wp;
551 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
552 if ((len & (len - 1)) || (addr & ~len_mask) ||
553 len == 0 || len > TARGET_PAGE_SIZE) {
554 error_report("tried to set invalid watchpoint at %"
555 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
556 return -EINVAL;
558 wp = g_malloc(sizeof(*wp));
560 wp->vaddr = addr;
561 wp->len_mask = len_mask;
562 wp->flags = flags;
564 /* keep all GDB-injected watchpoints in front */
565 if (flags & BP_GDB) {
566 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
567 } else {
568 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
571 tlb_flush_page(cpu, addr);
573 if (watchpoint)
574 *watchpoint = wp;
575 return 0;
578 /* Remove a specific watchpoint. */
579 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
580 int flags)
582 vaddr len_mask = ~(len - 1);
583 CPUWatchpoint *wp;
585 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
586 if (addr == wp->vaddr && len_mask == wp->len_mask
587 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
588 cpu_watchpoint_remove_by_ref(cpu, wp);
589 return 0;
592 return -ENOENT;
595 /* Remove a specific watchpoint by reference. */
596 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
598 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
600 tlb_flush_page(cpu, watchpoint->vaddr);
602 g_free(watchpoint);
605 /* Remove all matching watchpoints. */
606 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
608 CPUWatchpoint *wp, *next;
610 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
611 if (wp->flags & mask) {
612 cpu_watchpoint_remove_by_ref(cpu, wp);
616 #endif
618 /* Add a breakpoint. */
619 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
620 CPUBreakpoint **breakpoint)
622 #if defined(TARGET_HAS_ICE)
623 CPUBreakpoint *bp;
625 bp = g_malloc(sizeof(*bp));
627 bp->pc = pc;
628 bp->flags = flags;
630 /* keep all GDB-injected breakpoints in front */
631 if (flags & BP_GDB) {
632 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
633 } else {
634 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
637 breakpoint_invalidate(cpu, pc);
639 if (breakpoint) {
640 *breakpoint = bp;
642 return 0;
643 #else
644 return -ENOSYS;
645 #endif
648 /* Remove a specific breakpoint. */
649 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
651 #if defined(TARGET_HAS_ICE)
652 CPUBreakpoint *bp;
654 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
655 if (bp->pc == pc && bp->flags == flags) {
656 cpu_breakpoint_remove_by_ref(cpu, bp);
657 return 0;
660 return -ENOENT;
661 #else
662 return -ENOSYS;
663 #endif
666 /* Remove a specific breakpoint by reference. */
667 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
669 #if defined(TARGET_HAS_ICE)
670 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
672 breakpoint_invalidate(cpu, breakpoint->pc);
674 g_free(breakpoint);
675 #endif
678 /* Remove all matching breakpoints. */
679 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
681 #if defined(TARGET_HAS_ICE)
682 CPUBreakpoint *bp, *next;
684 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
685 if (bp->flags & mask) {
686 cpu_breakpoint_remove_by_ref(cpu, bp);
689 #endif
692 /* enable or disable single step mode. EXCP_DEBUG is returned by the
693 CPU loop after each instruction */
694 void cpu_single_step(CPUState *cpu, int enabled)
696 #if defined(TARGET_HAS_ICE)
697 if (cpu->singlestep_enabled != enabled) {
698 cpu->singlestep_enabled = enabled;
699 if (kvm_enabled()) {
700 kvm_update_guest_debug(cpu, 0);
701 } else {
702 /* must flush all the translated code to avoid inconsistencies */
703 /* XXX: only flush what is necessary */
704 CPUArchState *env = cpu->env_ptr;
705 tb_flush(env);
708 #endif
711 void cpu_abort(CPUState *cpu, const char *fmt, ...)
713 va_list ap;
714 va_list ap2;
716 va_start(ap, fmt);
717 va_copy(ap2, ap);
718 fprintf(stderr, "qemu: fatal: ");
719 vfprintf(stderr, fmt, ap);
720 fprintf(stderr, "\n");
721 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
722 if (qemu_log_enabled()) {
723 qemu_log("qemu: fatal: ");
724 qemu_log_vprintf(fmt, ap2);
725 qemu_log("\n");
726 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
727 qemu_log_flush();
728 qemu_log_close();
730 va_end(ap2);
731 va_end(ap);
732 #if defined(CONFIG_USER_ONLY)
734 struct sigaction act;
735 sigfillset(&act.sa_mask);
736 act.sa_handler = SIG_DFL;
737 sigaction(SIGABRT, &act, NULL);
739 #endif
740 abort();
743 #if !defined(CONFIG_USER_ONLY)
744 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
746 RAMBlock *block;
748 /* The list is protected by the iothread lock here. */
749 block = ram_list.mru_block;
750 if (block && addr - block->offset < block->length) {
751 goto found;
753 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
754 if (addr - block->offset < block->length) {
755 goto found;
759 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
760 abort();
762 found:
763 ram_list.mru_block = block;
764 return block;
767 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
769 ram_addr_t start1;
770 RAMBlock *block;
771 ram_addr_t end;
773 end = TARGET_PAGE_ALIGN(start + length);
774 start &= TARGET_PAGE_MASK;
776 block = qemu_get_ram_block(start);
777 assert(block == qemu_get_ram_block(end - 1));
778 start1 = (uintptr_t)block->host + (start - block->offset);
779 cpu_tlb_reset_dirty_all(start1, length);
782 /* Note: start and end must be within the same ram block. */
783 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
784 unsigned client)
786 if (length == 0)
787 return;
788 cpu_physical_memory_clear_dirty_range(start, length, client);
790 if (tcg_enabled()) {
791 tlb_reset_dirty_range_all(start, length);
795 static void cpu_physical_memory_set_dirty_tracking(bool enable)
797 in_migration = enable;
800 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
801 MemoryRegionSection *section,
802 target_ulong vaddr,
803 hwaddr paddr, hwaddr xlat,
804 int prot,
805 target_ulong *address)
807 hwaddr iotlb;
808 CPUWatchpoint *wp;
810 if (memory_region_is_ram(section->mr)) {
811 /* Normal RAM. */
812 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
813 + xlat;
814 if (!section->readonly) {
815 iotlb |= PHYS_SECTION_NOTDIRTY;
816 } else {
817 iotlb |= PHYS_SECTION_ROM;
819 } else {
820 iotlb = section - section->address_space->dispatch->map.sections;
821 iotlb += xlat;
824 /* Make accesses to pages with watchpoints go via the
825 watchpoint trap routines. */
826 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
827 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
828 /* Avoid trapping reads of pages with a write breakpoint. */
829 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
830 iotlb = PHYS_SECTION_WATCH + paddr;
831 *address |= TLB_MMIO;
832 break;
837 return iotlb;
839 #endif /* defined(CONFIG_USER_ONLY) */
841 #if !defined(CONFIG_USER_ONLY)
843 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
844 uint16_t section);
845 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
847 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
850 * Set a custom physical guest memory alloator.
851 * Accelerators with unusual needs may need this. Hopefully, we can
852 * get rid of it eventually.
854 void phys_mem_set_alloc(void *(*alloc)(size_t))
856 phys_mem_alloc = alloc;
859 static uint16_t phys_section_add(PhysPageMap *map,
860 MemoryRegionSection *section)
862 /* The physical section number is ORed with a page-aligned
863 * pointer to produce the iotlb entries. Thus it should
864 * never overflow into the page-aligned value.
866 assert(map->sections_nb < TARGET_PAGE_SIZE);
868 if (map->sections_nb == map->sections_nb_alloc) {
869 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
870 map->sections = g_renew(MemoryRegionSection, map->sections,
871 map->sections_nb_alloc);
873 map->sections[map->sections_nb] = *section;
874 memory_region_ref(section->mr);
875 return map->sections_nb++;
878 static void phys_section_destroy(MemoryRegion *mr)
880 memory_region_unref(mr);
882 if (mr->subpage) {
883 subpage_t *subpage = container_of(mr, subpage_t, iomem);
884 memory_region_destroy(&subpage->iomem);
885 g_free(subpage);
889 static void phys_sections_free(PhysPageMap *map)
891 while (map->sections_nb > 0) {
892 MemoryRegionSection *section = &map->sections[--map->sections_nb];
893 phys_section_destroy(section->mr);
895 g_free(map->sections);
896 g_free(map->nodes);
899 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
901 subpage_t *subpage;
902 hwaddr base = section->offset_within_address_space
903 & TARGET_PAGE_MASK;
904 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
905 d->map.nodes, d->map.sections);
906 MemoryRegionSection subsection = {
907 .offset_within_address_space = base,
908 .size = int128_make64(TARGET_PAGE_SIZE),
910 hwaddr start, end;
912 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
914 if (!(existing->mr->subpage)) {
915 subpage = subpage_init(d->as, base);
916 subsection.address_space = d->as;
917 subsection.mr = &subpage->iomem;
918 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
919 phys_section_add(&d->map, &subsection));
920 } else {
921 subpage = container_of(existing->mr, subpage_t, iomem);
923 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
924 end = start + int128_get64(section->size) - 1;
925 subpage_register(subpage, start, end,
926 phys_section_add(&d->map, section));
930 static void register_multipage(AddressSpaceDispatch *d,
931 MemoryRegionSection *section)
933 hwaddr start_addr = section->offset_within_address_space;
934 uint16_t section_index = phys_section_add(&d->map, section);
935 uint64_t num_pages = int128_get64(int128_rshift(section->size,
936 TARGET_PAGE_BITS));
938 assert(num_pages);
939 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
942 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
944 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
945 AddressSpaceDispatch *d = as->next_dispatch;
946 MemoryRegionSection now = *section, remain = *section;
947 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
949 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
950 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
951 - now.offset_within_address_space;
953 now.size = int128_min(int128_make64(left), now.size);
954 register_subpage(d, &now);
955 } else {
956 now.size = int128_zero();
958 while (int128_ne(remain.size, now.size)) {
959 remain.size = int128_sub(remain.size, now.size);
960 remain.offset_within_address_space += int128_get64(now.size);
961 remain.offset_within_region += int128_get64(now.size);
962 now = remain;
963 if (int128_lt(remain.size, page_size)) {
964 register_subpage(d, &now);
965 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
966 now.size = page_size;
967 register_subpage(d, &now);
968 } else {
969 now.size = int128_and(now.size, int128_neg(page_size));
970 register_multipage(d, &now);
975 void qemu_flush_coalesced_mmio_buffer(void)
977 if (kvm_enabled())
978 kvm_flush_coalesced_mmio_buffer();
981 void qemu_mutex_lock_ramlist(void)
983 qemu_mutex_lock(&ram_list.mutex);
986 void qemu_mutex_unlock_ramlist(void)
988 qemu_mutex_unlock(&ram_list.mutex);
991 #ifdef __linux__
993 #include <sys/vfs.h>
995 #define HUGETLBFS_MAGIC 0x958458f6
997 static long gethugepagesize(const char *path)
999 struct statfs fs;
1000 int ret;
1002 do {
1003 ret = statfs(path, &fs);
1004 } while (ret != 0 && errno == EINTR);
1006 if (ret != 0) {
1007 perror(path);
1008 return 0;
1011 if (fs.f_type != HUGETLBFS_MAGIC)
1012 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1014 return fs.f_bsize;
1017 static void *file_ram_alloc(RAMBlock *block,
1018 ram_addr_t memory,
1019 const char *path)
1021 char *filename;
1022 char *sanitized_name;
1023 char *c;
1024 void *area;
1025 int fd;
1026 unsigned long hpagesize;
1028 hpagesize = gethugepagesize(path);
1029 if (!hpagesize) {
1030 goto error;
1033 if (memory < hpagesize) {
1034 return NULL;
1037 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1038 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1039 goto error;
1042 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1043 sanitized_name = g_strdup(block->mr->name);
1044 for (c = sanitized_name; *c != '\0'; c++) {
1045 if (*c == '/')
1046 *c = '_';
1049 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1050 sanitized_name);
1051 g_free(sanitized_name);
1053 fd = mkstemp(filename);
1054 if (fd < 0) {
1055 perror("unable to create backing store for hugepages");
1056 g_free(filename);
1057 goto error;
1059 unlink(filename);
1060 g_free(filename);
1062 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1065 * ftruncate is not supported by hugetlbfs in older
1066 * hosts, so don't bother bailing out on errors.
1067 * If anything goes wrong with it under other filesystems,
1068 * mmap will fail.
1070 if (ftruncate(fd, memory))
1071 perror("ftruncate");
1073 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1074 if (area == MAP_FAILED) {
1075 perror("file_ram_alloc: can't mmap RAM pages");
1076 close(fd);
1077 goto error;
1080 if (mem_prealloc) {
1081 os_mem_prealloc(fd, area, memory);
1084 block->fd = fd;
1085 return area;
1087 error:
1088 if (mem_prealloc) {
1089 exit(1);
1091 return NULL;
1093 #else
1094 static void *file_ram_alloc(RAMBlock *block,
1095 ram_addr_t memory,
1096 const char *path)
1098 fprintf(stderr, "-mem-path not supported on this host\n");
1099 exit(1);
1101 #endif
1103 static ram_addr_t find_ram_offset(ram_addr_t size)
1105 RAMBlock *block, *next_block;
1106 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1108 assert(size != 0); /* it would hand out same offset multiple times */
1110 if (QTAILQ_EMPTY(&ram_list.blocks))
1111 return 0;
1113 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1114 ram_addr_t end, next = RAM_ADDR_MAX;
1116 end = block->offset + block->length;
1118 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1119 if (next_block->offset >= end) {
1120 next = MIN(next, next_block->offset);
1123 if (next - end >= size && next - end < mingap) {
1124 offset = end;
1125 mingap = next - end;
1129 if (offset == RAM_ADDR_MAX) {
1130 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1131 (uint64_t)size);
1132 abort();
1135 return offset;
1138 ram_addr_t last_ram_offset(void)
1140 RAMBlock *block;
1141 ram_addr_t last = 0;
1143 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1144 last = MAX(last, block->offset + block->length);
1146 return last;
1149 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1151 int ret;
1153 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1154 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1155 "dump-guest-core", true)) {
1156 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1157 if (ret) {
1158 perror("qemu_madvise");
1159 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1160 "but dump_guest_core=off specified\n");
1165 static RAMBlock *find_ram_block(ram_addr_t addr)
1167 RAMBlock *block;
1169 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1170 if (block->offset == addr) {
1171 return block;
1175 return NULL;
1178 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1180 RAMBlock *new_block = find_ram_block(addr);
1181 RAMBlock *block;
1183 assert(new_block);
1184 assert(!new_block->idstr[0]);
1186 if (dev) {
1187 char *id = qdev_get_dev_path(dev);
1188 if (id) {
1189 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1190 g_free(id);
1193 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1195 /* This assumes the iothread lock is taken here too. */
1196 qemu_mutex_lock_ramlist();
1197 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1198 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1199 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1200 new_block->idstr);
1201 abort();
1204 qemu_mutex_unlock_ramlist();
1207 void qemu_ram_unset_idstr(ram_addr_t addr)
1209 RAMBlock *block = find_ram_block(addr);
1211 if (block) {
1212 memset(block->idstr, 0, sizeof(block->idstr));
1216 static int memory_try_enable_merging(void *addr, size_t len)
1218 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1219 /* disabled by the user */
1220 return 0;
1223 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1226 static ram_addr_t ram_block_add(RAMBlock *new_block)
1228 RAMBlock *block;
1229 ram_addr_t old_ram_size, new_ram_size;
1231 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1233 /* This assumes the iothread lock is taken here too. */
1234 qemu_mutex_lock_ramlist();
1235 new_block->offset = find_ram_offset(new_block->length);
1237 if (!new_block->host) {
1238 if (xen_enabled()) {
1239 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1240 } else {
1241 new_block->host = phys_mem_alloc(new_block->length);
1242 if (!new_block->host) {
1243 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1244 new_block->mr->name, strerror(errno));
1245 exit(1);
1247 memory_try_enable_merging(new_block->host, new_block->length);
1251 /* Keep the list sorted from biggest to smallest block. */
1252 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1253 if (block->length < new_block->length) {
1254 break;
1257 if (block) {
1258 QTAILQ_INSERT_BEFORE(block, new_block, next);
1259 } else {
1260 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1262 ram_list.mru_block = NULL;
1264 ram_list.version++;
1265 qemu_mutex_unlock_ramlist();
1267 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1269 if (new_ram_size > old_ram_size) {
1270 int i;
1271 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1272 ram_list.dirty_memory[i] =
1273 bitmap_zero_extend(ram_list.dirty_memory[i],
1274 old_ram_size, new_ram_size);
1277 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1279 qemu_ram_setup_dump(new_block->host, new_block->length);
1280 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1281 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1283 if (kvm_enabled()) {
1284 kvm_setup_guest_memory(new_block->host, new_block->length);
1287 return new_block->offset;
1290 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1291 const char *mem_path)
1293 RAMBlock *new_block;
1295 if (xen_enabled()) {
1296 fprintf(stderr, "-mem-path not supported with Xen\n");
1297 exit(1);
1300 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1302 * file_ram_alloc() needs to allocate just like
1303 * phys_mem_alloc, but we haven't bothered to provide
1304 * a hook there.
1306 fprintf(stderr,
1307 "-mem-path not supported with this accelerator\n");
1308 exit(1);
1311 size = TARGET_PAGE_ALIGN(size);
1312 new_block = g_malloc0(sizeof(*new_block));
1313 new_block->mr = mr;
1314 new_block->length = size;
1315 new_block->host = file_ram_alloc(new_block, size, mem_path);
1316 return ram_block_add(new_block);
1319 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1320 MemoryRegion *mr)
1322 RAMBlock *new_block;
1324 size = TARGET_PAGE_ALIGN(size);
1325 new_block = g_malloc0(sizeof(*new_block));
1326 new_block->mr = mr;
1327 new_block->length = size;
1328 new_block->fd = -1;
1329 new_block->host = host;
1330 if (host) {
1331 new_block->flags |= RAM_PREALLOC;
1333 return ram_block_add(new_block);
1336 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1338 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1341 void qemu_ram_free_from_ptr(ram_addr_t addr)
1343 RAMBlock *block;
1345 /* This assumes the iothread lock is taken here too. */
1346 qemu_mutex_lock_ramlist();
1347 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1348 if (addr == block->offset) {
1349 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1350 ram_list.mru_block = NULL;
1351 ram_list.version++;
1352 g_free(block);
1353 break;
1356 qemu_mutex_unlock_ramlist();
1359 void qemu_ram_free(ram_addr_t addr)
1361 RAMBlock *block;
1363 /* This assumes the iothread lock is taken here too. */
1364 qemu_mutex_lock_ramlist();
1365 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1366 if (addr == block->offset) {
1367 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1368 ram_list.mru_block = NULL;
1369 ram_list.version++;
1370 if (block->flags & RAM_PREALLOC) {
1372 } else if (xen_enabled()) {
1373 xen_invalidate_map_cache_entry(block->host);
1374 #ifndef _WIN32
1375 } else if (block->fd >= 0) {
1376 munmap(block->host, block->length);
1377 close(block->fd);
1378 #endif
1379 } else {
1380 qemu_anon_ram_free(block->host, block->length);
1382 g_free(block);
1383 break;
1386 qemu_mutex_unlock_ramlist();
1390 #ifndef _WIN32
1391 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1393 RAMBlock *block;
1394 ram_addr_t offset;
1395 int flags;
1396 void *area, *vaddr;
1398 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1399 offset = addr - block->offset;
1400 if (offset < block->length) {
1401 vaddr = block->host + offset;
1402 if (block->flags & RAM_PREALLOC) {
1404 } else if (xen_enabled()) {
1405 abort();
1406 } else {
1407 flags = MAP_FIXED;
1408 munmap(vaddr, length);
1409 if (block->fd >= 0) {
1410 #ifdef MAP_POPULATE
1411 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1412 MAP_PRIVATE;
1413 #else
1414 flags |= MAP_PRIVATE;
1415 #endif
1416 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1417 flags, block->fd, offset);
1418 } else {
1420 * Remap needs to match alloc. Accelerators that
1421 * set phys_mem_alloc never remap. If they did,
1422 * we'd need a remap hook here.
1424 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1426 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1427 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1428 flags, -1, 0);
1430 if (area != vaddr) {
1431 fprintf(stderr, "Could not remap addr: "
1432 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1433 length, addr);
1434 exit(1);
1436 memory_try_enable_merging(vaddr, length);
1437 qemu_ram_setup_dump(vaddr, length);
1439 return;
1443 #endif /* !_WIN32 */
1445 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1446 With the exception of the softmmu code in this file, this should
1447 only be used for local memory (e.g. video ram) that the device owns,
1448 and knows it isn't going to access beyond the end of the block.
1450 It should not be used for general purpose DMA.
1451 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1453 void *qemu_get_ram_ptr(ram_addr_t addr)
1455 RAMBlock *block = qemu_get_ram_block(addr);
1457 if (xen_enabled()) {
1458 /* We need to check if the requested address is in the RAM
1459 * because we don't want to map the entire memory in QEMU.
1460 * In that case just map until the end of the page.
1462 if (block->offset == 0) {
1463 return xen_map_cache(addr, 0, 0);
1464 } else if (block->host == NULL) {
1465 block->host =
1466 xen_map_cache(block->offset, block->length, 1);
1469 return block->host + (addr - block->offset);
1472 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1473 * but takes a size argument */
1474 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1476 if (*size == 0) {
1477 return NULL;
1479 if (xen_enabled()) {
1480 return xen_map_cache(addr, *size, 1);
1481 } else {
1482 RAMBlock *block;
1484 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1485 if (addr - block->offset < block->length) {
1486 if (addr - block->offset + *size > block->length)
1487 *size = block->length - addr + block->offset;
1488 return block->host + (addr - block->offset);
1492 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1493 abort();
1497 /* Some of the softmmu routines need to translate from a host pointer
1498 (typically a TLB entry) back to a ram offset. */
1499 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1501 RAMBlock *block;
1502 uint8_t *host = ptr;
1504 if (xen_enabled()) {
1505 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1506 return qemu_get_ram_block(*ram_addr)->mr;
1509 block = ram_list.mru_block;
1510 if (block && block->host && host - block->host < block->length) {
1511 goto found;
1514 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1515 /* This case append when the block is not mapped. */
1516 if (block->host == NULL) {
1517 continue;
1519 if (host - block->host < block->length) {
1520 goto found;
1524 return NULL;
1526 found:
1527 *ram_addr = block->offset + (host - block->host);
1528 return block->mr;
1531 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1532 uint64_t val, unsigned size)
1534 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1535 tb_invalidate_phys_page_fast(ram_addr, size);
1537 switch (size) {
1538 case 1:
1539 stb_p(qemu_get_ram_ptr(ram_addr), val);
1540 break;
1541 case 2:
1542 stw_p(qemu_get_ram_ptr(ram_addr), val);
1543 break;
1544 case 4:
1545 stl_p(qemu_get_ram_ptr(ram_addr), val);
1546 break;
1547 default:
1548 abort();
1550 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1551 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1552 /* we remove the notdirty callback only if the code has been
1553 flushed */
1554 if (!cpu_physical_memory_is_clean(ram_addr)) {
1555 CPUArchState *env = current_cpu->env_ptr;
1556 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1560 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1561 unsigned size, bool is_write)
1563 return is_write;
1566 static const MemoryRegionOps notdirty_mem_ops = {
1567 .write = notdirty_mem_write,
1568 .valid.accepts = notdirty_mem_accepts,
1569 .endianness = DEVICE_NATIVE_ENDIAN,
1572 /* Generate a debug exception if a watchpoint has been hit. */
1573 static void check_watchpoint(int offset, int len_mask, int flags)
1575 CPUState *cpu = current_cpu;
1576 CPUArchState *env = cpu->env_ptr;
1577 target_ulong pc, cs_base;
1578 target_ulong vaddr;
1579 CPUWatchpoint *wp;
1580 int cpu_flags;
1582 if (cpu->watchpoint_hit) {
1583 /* We re-entered the check after replacing the TB. Now raise
1584 * the debug interrupt so that is will trigger after the
1585 * current instruction. */
1586 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1587 return;
1589 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1590 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1591 if ((vaddr == (wp->vaddr & len_mask) ||
1592 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1593 wp->flags |= BP_WATCHPOINT_HIT;
1594 if (!cpu->watchpoint_hit) {
1595 cpu->watchpoint_hit = wp;
1596 tb_check_watchpoint(cpu);
1597 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1598 cpu->exception_index = EXCP_DEBUG;
1599 cpu_loop_exit(cpu);
1600 } else {
1601 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1602 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1603 cpu_resume_from_signal(cpu, NULL);
1606 } else {
1607 wp->flags &= ~BP_WATCHPOINT_HIT;
1612 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1613 so these check for a hit then pass through to the normal out-of-line
1614 phys routines. */
1615 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1616 unsigned size)
1618 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1619 switch (size) {
1620 case 1: return ldub_phys(&address_space_memory, addr);
1621 case 2: return lduw_phys(&address_space_memory, addr);
1622 case 4: return ldl_phys(&address_space_memory, addr);
1623 default: abort();
1627 static void watch_mem_write(void *opaque, hwaddr addr,
1628 uint64_t val, unsigned size)
1630 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1631 switch (size) {
1632 case 1:
1633 stb_phys(&address_space_memory, addr, val);
1634 break;
1635 case 2:
1636 stw_phys(&address_space_memory, addr, val);
1637 break;
1638 case 4:
1639 stl_phys(&address_space_memory, addr, val);
1640 break;
1641 default: abort();
1645 static const MemoryRegionOps watch_mem_ops = {
1646 .read = watch_mem_read,
1647 .write = watch_mem_write,
1648 .endianness = DEVICE_NATIVE_ENDIAN,
1651 static uint64_t subpage_read(void *opaque, hwaddr addr,
1652 unsigned len)
1654 subpage_t *subpage = opaque;
1655 uint8_t buf[4];
1657 #if defined(DEBUG_SUBPAGE)
1658 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1659 subpage, len, addr);
1660 #endif
1661 address_space_read(subpage->as, addr + subpage->base, buf, len);
1662 switch (len) {
1663 case 1:
1664 return ldub_p(buf);
1665 case 2:
1666 return lduw_p(buf);
1667 case 4:
1668 return ldl_p(buf);
1669 default:
1670 abort();
1674 static void subpage_write(void *opaque, hwaddr addr,
1675 uint64_t value, unsigned len)
1677 subpage_t *subpage = opaque;
1678 uint8_t buf[4];
1680 #if defined(DEBUG_SUBPAGE)
1681 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1682 " value %"PRIx64"\n",
1683 __func__, subpage, len, addr, value);
1684 #endif
1685 switch (len) {
1686 case 1:
1687 stb_p(buf, value);
1688 break;
1689 case 2:
1690 stw_p(buf, value);
1691 break;
1692 case 4:
1693 stl_p(buf, value);
1694 break;
1695 default:
1696 abort();
1698 address_space_write(subpage->as, addr + subpage->base, buf, len);
1701 static bool subpage_accepts(void *opaque, hwaddr addr,
1702 unsigned len, bool is_write)
1704 subpage_t *subpage = opaque;
1705 #if defined(DEBUG_SUBPAGE)
1706 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1707 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1708 #endif
1710 return address_space_access_valid(subpage->as, addr + subpage->base,
1711 len, is_write);
1714 static const MemoryRegionOps subpage_ops = {
1715 .read = subpage_read,
1716 .write = subpage_write,
1717 .valid.accepts = subpage_accepts,
1718 .endianness = DEVICE_NATIVE_ENDIAN,
1721 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1722 uint16_t section)
1724 int idx, eidx;
1726 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1727 return -1;
1728 idx = SUBPAGE_IDX(start);
1729 eidx = SUBPAGE_IDX(end);
1730 #if defined(DEBUG_SUBPAGE)
1731 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1732 __func__, mmio, start, end, idx, eidx, section);
1733 #endif
1734 for (; idx <= eidx; idx++) {
1735 mmio->sub_section[idx] = section;
1738 return 0;
1741 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1743 subpage_t *mmio;
1745 mmio = g_malloc0(sizeof(subpage_t));
1747 mmio->as = as;
1748 mmio->base = base;
1749 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1750 "subpage", TARGET_PAGE_SIZE);
1751 mmio->iomem.subpage = true;
1752 #if defined(DEBUG_SUBPAGE)
1753 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1754 mmio, base, TARGET_PAGE_SIZE);
1755 #endif
1756 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1758 return mmio;
1761 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1762 MemoryRegion *mr)
1764 assert(as);
1765 MemoryRegionSection section = {
1766 .address_space = as,
1767 .mr = mr,
1768 .offset_within_address_space = 0,
1769 .offset_within_region = 0,
1770 .size = int128_2_64(),
1773 return phys_section_add(map, &section);
1776 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1778 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1781 static void io_mem_init(void)
1783 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1784 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1785 "unassigned", UINT64_MAX);
1786 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1787 "notdirty", UINT64_MAX);
1788 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1789 "watch", UINT64_MAX);
1792 static void mem_begin(MemoryListener *listener)
1794 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1795 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1796 uint16_t n;
1798 n = dummy_section(&d->map, as, &io_mem_unassigned);
1799 assert(n == PHYS_SECTION_UNASSIGNED);
1800 n = dummy_section(&d->map, as, &io_mem_notdirty);
1801 assert(n == PHYS_SECTION_NOTDIRTY);
1802 n = dummy_section(&d->map, as, &io_mem_rom);
1803 assert(n == PHYS_SECTION_ROM);
1804 n = dummy_section(&d->map, as, &io_mem_watch);
1805 assert(n == PHYS_SECTION_WATCH);
1807 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1808 d->as = as;
1809 as->next_dispatch = d;
1812 static void mem_commit(MemoryListener *listener)
1814 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1815 AddressSpaceDispatch *cur = as->dispatch;
1816 AddressSpaceDispatch *next = as->next_dispatch;
1818 phys_page_compact_all(next, next->map.nodes_nb);
1820 as->dispatch = next;
1822 if (cur) {
1823 phys_sections_free(&cur->map);
1824 g_free(cur);
1828 static void tcg_commit(MemoryListener *listener)
1830 CPUState *cpu;
1832 /* since each CPU stores ram addresses in its TLB cache, we must
1833 reset the modified entries */
1834 /* XXX: slow ! */
1835 CPU_FOREACH(cpu) {
1836 /* FIXME: Disentangle the cpu.h circular files deps so we can
1837 directly get the right CPU from listener. */
1838 if (cpu->tcg_as_listener != listener) {
1839 continue;
1841 tlb_flush(cpu, 1);
1845 static void core_log_global_start(MemoryListener *listener)
1847 cpu_physical_memory_set_dirty_tracking(true);
1850 static void core_log_global_stop(MemoryListener *listener)
1852 cpu_physical_memory_set_dirty_tracking(false);
1855 static MemoryListener core_memory_listener = {
1856 .log_global_start = core_log_global_start,
1857 .log_global_stop = core_log_global_stop,
1858 .priority = 1,
1861 void address_space_init_dispatch(AddressSpace *as)
1863 as->dispatch = NULL;
1864 as->dispatch_listener = (MemoryListener) {
1865 .begin = mem_begin,
1866 .commit = mem_commit,
1867 .region_add = mem_add,
1868 .region_nop = mem_add,
1869 .priority = 0,
1871 memory_listener_register(&as->dispatch_listener, as);
1874 void address_space_destroy_dispatch(AddressSpace *as)
1876 AddressSpaceDispatch *d = as->dispatch;
1878 memory_listener_unregister(&as->dispatch_listener);
1879 g_free(d);
1880 as->dispatch = NULL;
1883 static void memory_map_init(void)
1885 system_memory = g_malloc(sizeof(*system_memory));
1887 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1888 address_space_init(&address_space_memory, system_memory, "memory");
1890 system_io = g_malloc(sizeof(*system_io));
1891 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1892 65536);
1893 address_space_init(&address_space_io, system_io, "I/O");
1895 memory_listener_register(&core_memory_listener, &address_space_memory);
1898 MemoryRegion *get_system_memory(void)
1900 return system_memory;
1903 MemoryRegion *get_system_io(void)
1905 return system_io;
1908 #endif /* !defined(CONFIG_USER_ONLY) */
1910 /* physical memory access (slow version, mainly for debug) */
1911 #if defined(CONFIG_USER_ONLY)
1912 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1913 uint8_t *buf, int len, int is_write)
1915 int l, flags;
1916 target_ulong page;
1917 void * p;
1919 while (len > 0) {
1920 page = addr & TARGET_PAGE_MASK;
1921 l = (page + TARGET_PAGE_SIZE) - addr;
1922 if (l > len)
1923 l = len;
1924 flags = page_get_flags(page);
1925 if (!(flags & PAGE_VALID))
1926 return -1;
1927 if (is_write) {
1928 if (!(flags & PAGE_WRITE))
1929 return -1;
1930 /* XXX: this code should not depend on lock_user */
1931 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1932 return -1;
1933 memcpy(p, buf, l);
1934 unlock_user(p, addr, l);
1935 } else {
1936 if (!(flags & PAGE_READ))
1937 return -1;
1938 /* XXX: this code should not depend on lock_user */
1939 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1940 return -1;
1941 memcpy(buf, p, l);
1942 unlock_user(p, addr, 0);
1944 len -= l;
1945 buf += l;
1946 addr += l;
1948 return 0;
1951 #else
1953 static void invalidate_and_set_dirty(hwaddr addr,
1954 hwaddr length)
1956 if (cpu_physical_memory_is_clean(addr)) {
1957 /* invalidate code */
1958 tb_invalidate_phys_page_range(addr, addr + length, 0);
1959 /* set dirty bit */
1960 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1961 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1963 xen_modified_memory(addr, length);
1966 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1968 unsigned access_size_max = mr->ops->valid.max_access_size;
1970 /* Regions are assumed to support 1-4 byte accesses unless
1971 otherwise specified. */
1972 if (access_size_max == 0) {
1973 access_size_max = 4;
1976 /* Bound the maximum access by the alignment of the address. */
1977 if (!mr->ops->impl.unaligned) {
1978 unsigned align_size_max = addr & -addr;
1979 if (align_size_max != 0 && align_size_max < access_size_max) {
1980 access_size_max = align_size_max;
1984 /* Don't attempt accesses larger than the maximum. */
1985 if (l > access_size_max) {
1986 l = access_size_max;
1988 if (l & (l - 1)) {
1989 l = 1 << (qemu_fls(l) - 1);
1992 return l;
1995 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1996 int len, bool is_write)
1998 hwaddr l;
1999 uint8_t *ptr;
2000 uint64_t val;
2001 hwaddr addr1;
2002 MemoryRegion *mr;
2003 bool error = false;
2005 while (len > 0) {
2006 l = len;
2007 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2009 if (is_write) {
2010 if (!memory_access_is_direct(mr, is_write)) {
2011 l = memory_access_size(mr, l, addr1);
2012 /* XXX: could force current_cpu to NULL to avoid
2013 potential bugs */
2014 switch (l) {
2015 case 8:
2016 /* 64 bit write access */
2017 val = ldq_p(buf);
2018 error |= io_mem_write(mr, addr1, val, 8);
2019 break;
2020 case 4:
2021 /* 32 bit write access */
2022 val = ldl_p(buf);
2023 error |= io_mem_write(mr, addr1, val, 4);
2024 break;
2025 case 2:
2026 /* 16 bit write access */
2027 val = lduw_p(buf);
2028 error |= io_mem_write(mr, addr1, val, 2);
2029 break;
2030 case 1:
2031 /* 8 bit write access */
2032 val = ldub_p(buf);
2033 error |= io_mem_write(mr, addr1, val, 1);
2034 break;
2035 default:
2036 abort();
2038 } else {
2039 addr1 += memory_region_get_ram_addr(mr);
2040 /* RAM case */
2041 ptr = qemu_get_ram_ptr(addr1);
2042 memcpy(ptr, buf, l);
2043 invalidate_and_set_dirty(addr1, l);
2045 } else {
2046 if (!memory_access_is_direct(mr, is_write)) {
2047 /* I/O case */
2048 l = memory_access_size(mr, l, addr1);
2049 switch (l) {
2050 case 8:
2051 /* 64 bit read access */
2052 error |= io_mem_read(mr, addr1, &val, 8);
2053 stq_p(buf, val);
2054 break;
2055 case 4:
2056 /* 32 bit read access */
2057 error |= io_mem_read(mr, addr1, &val, 4);
2058 stl_p(buf, val);
2059 break;
2060 case 2:
2061 /* 16 bit read access */
2062 error |= io_mem_read(mr, addr1, &val, 2);
2063 stw_p(buf, val);
2064 break;
2065 case 1:
2066 /* 8 bit read access */
2067 error |= io_mem_read(mr, addr1, &val, 1);
2068 stb_p(buf, val);
2069 break;
2070 default:
2071 abort();
2073 } else {
2074 /* RAM case */
2075 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2076 memcpy(buf, ptr, l);
2079 len -= l;
2080 buf += l;
2081 addr += l;
2084 return error;
2087 bool address_space_write(AddressSpace *as, hwaddr addr,
2088 const uint8_t *buf, int len)
2090 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2093 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2095 return address_space_rw(as, addr, buf, len, false);
2099 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2100 int len, int is_write)
2102 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2105 enum write_rom_type {
2106 WRITE_DATA,
2107 FLUSH_CACHE,
2110 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2111 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2113 hwaddr l;
2114 uint8_t *ptr;
2115 hwaddr addr1;
2116 MemoryRegion *mr;
2118 while (len > 0) {
2119 l = len;
2120 mr = address_space_translate(as, addr, &addr1, &l, true);
2122 if (!(memory_region_is_ram(mr) ||
2123 memory_region_is_romd(mr))) {
2124 /* do nothing */
2125 } else {
2126 addr1 += memory_region_get_ram_addr(mr);
2127 /* ROM/RAM case */
2128 ptr = qemu_get_ram_ptr(addr1);
2129 switch (type) {
2130 case WRITE_DATA:
2131 memcpy(ptr, buf, l);
2132 invalidate_and_set_dirty(addr1, l);
2133 break;
2134 case FLUSH_CACHE:
2135 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2136 break;
2139 len -= l;
2140 buf += l;
2141 addr += l;
2145 /* used for ROM loading : can write in RAM and ROM */
2146 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2147 const uint8_t *buf, int len)
2149 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2152 void cpu_flush_icache_range(hwaddr start, int len)
2155 * This function should do the same thing as an icache flush that was
2156 * triggered from within the guest. For TCG we are always cache coherent,
2157 * so there is no need to flush anything. For KVM / Xen we need to flush
2158 * the host's instruction cache at least.
2160 if (tcg_enabled()) {
2161 return;
2164 cpu_physical_memory_write_rom_internal(&address_space_memory,
2165 start, NULL, len, FLUSH_CACHE);
2168 typedef struct {
2169 MemoryRegion *mr;
2170 void *buffer;
2171 hwaddr addr;
2172 hwaddr len;
2173 } BounceBuffer;
2175 static BounceBuffer bounce;
2177 typedef struct MapClient {
2178 void *opaque;
2179 void (*callback)(void *opaque);
2180 QLIST_ENTRY(MapClient) link;
2181 } MapClient;
2183 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2184 = QLIST_HEAD_INITIALIZER(map_client_list);
2186 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2188 MapClient *client = g_malloc(sizeof(*client));
2190 client->opaque = opaque;
2191 client->callback = callback;
2192 QLIST_INSERT_HEAD(&map_client_list, client, link);
2193 return client;
2196 static void cpu_unregister_map_client(void *_client)
2198 MapClient *client = (MapClient *)_client;
2200 QLIST_REMOVE(client, link);
2201 g_free(client);
2204 static void cpu_notify_map_clients(void)
2206 MapClient *client;
2208 while (!QLIST_EMPTY(&map_client_list)) {
2209 client = QLIST_FIRST(&map_client_list);
2210 client->callback(client->opaque);
2211 cpu_unregister_map_client(client);
2215 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2217 MemoryRegion *mr;
2218 hwaddr l, xlat;
2220 while (len > 0) {
2221 l = len;
2222 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2223 if (!memory_access_is_direct(mr, is_write)) {
2224 l = memory_access_size(mr, l, addr);
2225 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2226 return false;
2230 len -= l;
2231 addr += l;
2233 return true;
2236 /* Map a physical memory region into a host virtual address.
2237 * May map a subset of the requested range, given by and returned in *plen.
2238 * May return NULL if resources needed to perform the mapping are exhausted.
2239 * Use only for reads OR writes - not for read-modify-write operations.
2240 * Use cpu_register_map_client() to know when retrying the map operation is
2241 * likely to succeed.
2243 void *address_space_map(AddressSpace *as,
2244 hwaddr addr,
2245 hwaddr *plen,
2246 bool is_write)
2248 hwaddr len = *plen;
2249 hwaddr done = 0;
2250 hwaddr l, xlat, base;
2251 MemoryRegion *mr, *this_mr;
2252 ram_addr_t raddr;
2254 if (len == 0) {
2255 return NULL;
2258 l = len;
2259 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2260 if (!memory_access_is_direct(mr, is_write)) {
2261 if (bounce.buffer) {
2262 return NULL;
2264 /* Avoid unbounded allocations */
2265 l = MIN(l, TARGET_PAGE_SIZE);
2266 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2267 bounce.addr = addr;
2268 bounce.len = l;
2270 memory_region_ref(mr);
2271 bounce.mr = mr;
2272 if (!is_write) {
2273 address_space_read(as, addr, bounce.buffer, l);
2276 *plen = l;
2277 return bounce.buffer;
2280 base = xlat;
2281 raddr = memory_region_get_ram_addr(mr);
2283 for (;;) {
2284 len -= l;
2285 addr += l;
2286 done += l;
2287 if (len == 0) {
2288 break;
2291 l = len;
2292 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2293 if (this_mr != mr || xlat != base + done) {
2294 break;
2298 memory_region_ref(mr);
2299 *plen = done;
2300 return qemu_ram_ptr_length(raddr + base, plen);
2303 /* Unmaps a memory region previously mapped by address_space_map().
2304 * Will also mark the memory as dirty if is_write == 1. access_len gives
2305 * the amount of memory that was actually read or written by the caller.
2307 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2308 int is_write, hwaddr access_len)
2310 if (buffer != bounce.buffer) {
2311 MemoryRegion *mr;
2312 ram_addr_t addr1;
2314 mr = qemu_ram_addr_from_host(buffer, &addr1);
2315 assert(mr != NULL);
2316 if (is_write) {
2317 while (access_len) {
2318 unsigned l;
2319 l = TARGET_PAGE_SIZE;
2320 if (l > access_len)
2321 l = access_len;
2322 invalidate_and_set_dirty(addr1, l);
2323 addr1 += l;
2324 access_len -= l;
2327 if (xen_enabled()) {
2328 xen_invalidate_map_cache_entry(buffer);
2330 memory_region_unref(mr);
2331 return;
2333 if (is_write) {
2334 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2336 qemu_vfree(bounce.buffer);
2337 bounce.buffer = NULL;
2338 memory_region_unref(bounce.mr);
2339 cpu_notify_map_clients();
2342 void *cpu_physical_memory_map(hwaddr addr,
2343 hwaddr *plen,
2344 int is_write)
2346 return address_space_map(&address_space_memory, addr, plen, is_write);
2349 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2350 int is_write, hwaddr access_len)
2352 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2355 /* warning: addr must be aligned */
2356 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2357 enum device_endian endian)
2359 uint8_t *ptr;
2360 uint64_t val;
2361 MemoryRegion *mr;
2362 hwaddr l = 4;
2363 hwaddr addr1;
2365 mr = address_space_translate(as, addr, &addr1, &l, false);
2366 if (l < 4 || !memory_access_is_direct(mr, false)) {
2367 /* I/O case */
2368 io_mem_read(mr, addr1, &val, 4);
2369 #if defined(TARGET_WORDS_BIGENDIAN)
2370 if (endian == DEVICE_LITTLE_ENDIAN) {
2371 val = bswap32(val);
2373 #else
2374 if (endian == DEVICE_BIG_ENDIAN) {
2375 val = bswap32(val);
2377 #endif
2378 } else {
2379 /* RAM case */
2380 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2381 & TARGET_PAGE_MASK)
2382 + addr1);
2383 switch (endian) {
2384 case DEVICE_LITTLE_ENDIAN:
2385 val = ldl_le_p(ptr);
2386 break;
2387 case DEVICE_BIG_ENDIAN:
2388 val = ldl_be_p(ptr);
2389 break;
2390 default:
2391 val = ldl_p(ptr);
2392 break;
2395 return val;
2398 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2400 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2403 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2405 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2408 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2410 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2413 /* warning: addr must be aligned */
2414 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2415 enum device_endian endian)
2417 uint8_t *ptr;
2418 uint64_t val;
2419 MemoryRegion *mr;
2420 hwaddr l = 8;
2421 hwaddr addr1;
2423 mr = address_space_translate(as, addr, &addr1, &l,
2424 false);
2425 if (l < 8 || !memory_access_is_direct(mr, false)) {
2426 /* I/O case */
2427 io_mem_read(mr, addr1, &val, 8);
2428 #if defined(TARGET_WORDS_BIGENDIAN)
2429 if (endian == DEVICE_LITTLE_ENDIAN) {
2430 val = bswap64(val);
2432 #else
2433 if (endian == DEVICE_BIG_ENDIAN) {
2434 val = bswap64(val);
2436 #endif
2437 } else {
2438 /* RAM case */
2439 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2440 & TARGET_PAGE_MASK)
2441 + addr1);
2442 switch (endian) {
2443 case DEVICE_LITTLE_ENDIAN:
2444 val = ldq_le_p(ptr);
2445 break;
2446 case DEVICE_BIG_ENDIAN:
2447 val = ldq_be_p(ptr);
2448 break;
2449 default:
2450 val = ldq_p(ptr);
2451 break;
2454 return val;
2457 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2459 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2462 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2464 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2467 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2469 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2472 /* XXX: optimize */
2473 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2475 uint8_t val;
2476 address_space_rw(as, addr, &val, 1, 0);
2477 return val;
2480 /* warning: addr must be aligned */
2481 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2482 enum device_endian endian)
2484 uint8_t *ptr;
2485 uint64_t val;
2486 MemoryRegion *mr;
2487 hwaddr l = 2;
2488 hwaddr addr1;
2490 mr = address_space_translate(as, addr, &addr1, &l,
2491 false);
2492 if (l < 2 || !memory_access_is_direct(mr, false)) {
2493 /* I/O case */
2494 io_mem_read(mr, addr1, &val, 2);
2495 #if defined(TARGET_WORDS_BIGENDIAN)
2496 if (endian == DEVICE_LITTLE_ENDIAN) {
2497 val = bswap16(val);
2499 #else
2500 if (endian == DEVICE_BIG_ENDIAN) {
2501 val = bswap16(val);
2503 #endif
2504 } else {
2505 /* RAM case */
2506 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2507 & TARGET_PAGE_MASK)
2508 + addr1);
2509 switch (endian) {
2510 case DEVICE_LITTLE_ENDIAN:
2511 val = lduw_le_p(ptr);
2512 break;
2513 case DEVICE_BIG_ENDIAN:
2514 val = lduw_be_p(ptr);
2515 break;
2516 default:
2517 val = lduw_p(ptr);
2518 break;
2521 return val;
2524 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2526 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2529 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2531 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2534 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2536 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2539 /* warning: addr must be aligned. The ram page is not masked as dirty
2540 and the code inside is not invalidated. It is useful if the dirty
2541 bits are used to track modified PTEs */
2542 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2544 uint8_t *ptr;
2545 MemoryRegion *mr;
2546 hwaddr l = 4;
2547 hwaddr addr1;
2549 mr = address_space_translate(as, addr, &addr1, &l,
2550 true);
2551 if (l < 4 || !memory_access_is_direct(mr, true)) {
2552 io_mem_write(mr, addr1, val, 4);
2553 } else {
2554 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2555 ptr = qemu_get_ram_ptr(addr1);
2556 stl_p(ptr, val);
2558 if (unlikely(in_migration)) {
2559 if (cpu_physical_memory_is_clean(addr1)) {
2560 /* invalidate code */
2561 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2562 /* set dirty bit */
2563 cpu_physical_memory_set_dirty_flag(addr1,
2564 DIRTY_MEMORY_MIGRATION);
2565 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2571 /* warning: addr must be aligned */
2572 static inline void stl_phys_internal(AddressSpace *as,
2573 hwaddr addr, uint32_t val,
2574 enum device_endian endian)
2576 uint8_t *ptr;
2577 MemoryRegion *mr;
2578 hwaddr l = 4;
2579 hwaddr addr1;
2581 mr = address_space_translate(as, addr, &addr1, &l,
2582 true);
2583 if (l < 4 || !memory_access_is_direct(mr, true)) {
2584 #if defined(TARGET_WORDS_BIGENDIAN)
2585 if (endian == DEVICE_LITTLE_ENDIAN) {
2586 val = bswap32(val);
2588 #else
2589 if (endian == DEVICE_BIG_ENDIAN) {
2590 val = bswap32(val);
2592 #endif
2593 io_mem_write(mr, addr1, val, 4);
2594 } else {
2595 /* RAM case */
2596 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2597 ptr = qemu_get_ram_ptr(addr1);
2598 switch (endian) {
2599 case DEVICE_LITTLE_ENDIAN:
2600 stl_le_p(ptr, val);
2601 break;
2602 case DEVICE_BIG_ENDIAN:
2603 stl_be_p(ptr, val);
2604 break;
2605 default:
2606 stl_p(ptr, val);
2607 break;
2609 invalidate_and_set_dirty(addr1, 4);
2613 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2615 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2618 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2620 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2623 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2625 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2628 /* XXX: optimize */
2629 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2631 uint8_t v = val;
2632 address_space_rw(as, addr, &v, 1, 1);
2635 /* warning: addr must be aligned */
2636 static inline void stw_phys_internal(AddressSpace *as,
2637 hwaddr addr, uint32_t val,
2638 enum device_endian endian)
2640 uint8_t *ptr;
2641 MemoryRegion *mr;
2642 hwaddr l = 2;
2643 hwaddr addr1;
2645 mr = address_space_translate(as, addr, &addr1, &l, true);
2646 if (l < 2 || !memory_access_is_direct(mr, true)) {
2647 #if defined(TARGET_WORDS_BIGENDIAN)
2648 if (endian == DEVICE_LITTLE_ENDIAN) {
2649 val = bswap16(val);
2651 #else
2652 if (endian == DEVICE_BIG_ENDIAN) {
2653 val = bswap16(val);
2655 #endif
2656 io_mem_write(mr, addr1, val, 2);
2657 } else {
2658 /* RAM case */
2659 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2660 ptr = qemu_get_ram_ptr(addr1);
2661 switch (endian) {
2662 case DEVICE_LITTLE_ENDIAN:
2663 stw_le_p(ptr, val);
2664 break;
2665 case DEVICE_BIG_ENDIAN:
2666 stw_be_p(ptr, val);
2667 break;
2668 default:
2669 stw_p(ptr, val);
2670 break;
2672 invalidate_and_set_dirty(addr1, 2);
2676 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2678 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2681 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2683 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2686 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2688 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2691 /* XXX: optimize */
2692 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2694 val = tswap64(val);
2695 address_space_rw(as, addr, (void *) &val, 8, 1);
2698 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2700 val = cpu_to_le64(val);
2701 address_space_rw(as, addr, (void *) &val, 8, 1);
2704 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2706 val = cpu_to_be64(val);
2707 address_space_rw(as, addr, (void *) &val, 8, 1);
2710 /* virtual memory access for debug (includes writing to ROM) */
2711 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2712 uint8_t *buf, int len, int is_write)
2714 int l;
2715 hwaddr phys_addr;
2716 target_ulong page;
2718 while (len > 0) {
2719 page = addr & TARGET_PAGE_MASK;
2720 phys_addr = cpu_get_phys_page_debug(cpu, page);
2721 /* if no physical page mapped, return an error */
2722 if (phys_addr == -1)
2723 return -1;
2724 l = (page + TARGET_PAGE_SIZE) - addr;
2725 if (l > len)
2726 l = len;
2727 phys_addr += (addr & ~TARGET_PAGE_MASK);
2728 if (is_write) {
2729 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2730 } else {
2731 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2733 len -= l;
2734 buf += l;
2735 addr += l;
2737 return 0;
2739 #endif
2741 #if !defined(CONFIG_USER_ONLY)
2744 * A helper function for the _utterly broken_ virtio device model to find out if
2745 * it's running on a big endian machine. Don't do this at home kids!
2747 bool virtio_is_big_endian(void);
2748 bool virtio_is_big_endian(void)
2750 #if defined(TARGET_WORDS_BIGENDIAN)
2751 return true;
2752 #else
2753 return false;
2754 #endif
2757 #endif
2759 #ifndef CONFIG_USER_ONLY
2760 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2762 MemoryRegion*mr;
2763 hwaddr l = 1;
2765 mr = address_space_translate(&address_space_memory,
2766 phys_addr, &phys_addr, &l, false);
2768 return !(memory_region_is_ram(mr) ||
2769 memory_region_is_romd(mr));
2772 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2774 RAMBlock *block;
2776 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2777 func(block->host, block->offset, block->length, opaque);
2780 #endif