block/cloop: refuse images with bogus offsets (CVE-2014-0144)
[qemu.git] / exec.c
blob91513c6c43d47a84157a46ad33042e60911bacaf
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
55 #include "qemu/range.h"
57 //#define DEBUG_SUBPAGE
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
73 #endif
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
77 cpu_exec() */
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
82 int use_icount;
84 #if !defined(CONFIG_USER_ONLY)
86 typedef struct PhysPageEntry PhysPageEntry;
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 uint32_t skip : 6;
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
92 uint32_t ptr : 26;
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
100 #define P_L2_BITS 9
101 #define P_L2_SIZE (1 << P_L2_BITS)
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
105 typedef PhysPageEntry Node[P_L2_SIZE];
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
110 unsigned nodes_nb;
111 unsigned nodes_nb_alloc;
112 Node *nodes;
113 MemoryRegionSection *sections;
114 } PhysPageMap;
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
120 PhysPageEntry phys_map;
121 PhysPageMap map;
122 AddressSpace *as;
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
127 MemoryRegion iomem;
128 AddressSpace *as;
129 hwaddr base;
130 uint16_t sub_section[TARGET_PAGE_SIZE];
131 } subpage_t;
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140 static void tcg_commit(MemoryListener *listener);
142 static MemoryRegion io_mem_watch;
143 #endif
145 #if !defined(CONFIG_USER_ONLY)
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
158 unsigned i;
159 uint32_t ret;
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
168 return ret;
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
173 int level)
175 PhysPageEntry *p;
176 int i;
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
182 if (level == 0) {
183 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].skip = 0;
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
188 } else {
189 p = map->nodes[lp->ptr];
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 lp->skip = 0;
196 lp->ptr = leaf;
197 *index += step;
198 *nb -= step;
199 } else {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
202 ++lp;
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
208 uint16_t leaf)
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
221 unsigned valid_ptr = P_L2_SIZE;
222 int valid = 0;
223 PhysPageEntry *p;
224 int i;
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
227 return;
230 p = nodes[lp->ptr];
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
233 continue;
236 valid_ptr = i;
237 valid++;
238 if (p[i].skip) {
239 phys_page_compact(&p[i], nodes, compacted);
243 /* We can only compress if there's only one child. */
244 if (valid != 1) {
245 return;
248 assert(valid_ptr < P_L2_SIZE);
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
252 return;
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
261 * change this rule.
263 lp->skip = 0;
264 } else {
265 lp->skip += p[valid_ptr].skip;
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
271 DECLARE_BITMAP(compacted, nodes_nb);
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
281 PhysPageEntry *p;
282 hwaddr index = addr >> TARGET_PAGE_BITS;
283 int i;
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return &sections[PHYS_SECTION_UNASSIGNED];
289 p = nodes[lp.ptr];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return &sections[lp.ptr];
297 } else {
298 return &sections[PHYS_SECTION_UNASSIGNED];
302 bool memory_region_is_unassigned(MemoryRegion *mr)
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 hwaddr addr,
310 bool resolve_subpage)
312 MemoryRegionSection *section;
313 subpage_t *subpage;
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
320 return section;
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
327 MemoryRegionSection *section;
328 Int128 diff;
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 return section;
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
347 if (memory_region_is_romd(mr)) {
348 return !is_write;
351 return false;
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
356 bool is_write)
358 IOMMUTLBEntry iotlb;
359 MemoryRegionSection *section;
360 MemoryRegion *mr;
361 hwaddr len = *plen;
363 for (;;) {
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
365 mr = section->mr;
367 if (!mr->iommu_ops) {
368 break;
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
377 break;
380 as = iotlb.target_as;
383 if (memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
388 *plen = len;
389 *xlat = addr;
390 return mr;
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
395 hwaddr *plen)
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
400 assert(!section->mr->iommu_ops);
401 return section;
403 #endif
405 void cpu_exec_init_all(void)
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
409 memory_map_init();
410 io_mem_init();
411 #endif
414 #if !defined(CONFIG_USER_ONLY)
416 static int cpu_common_post_load(void *opaque, int version_id)
418 CPUState *cpu = opaque;
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu, 1);
425 return 0;
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
430 .version_id = 1,
431 .minimum_version_id = 1,
432 .minimum_version_id_old = 1,
433 .post_load = cpu_common_post_load,
434 .fields = (VMStateField []) {
435 VMSTATE_UINT32(halted, CPUState),
436 VMSTATE_UINT32(interrupt_request, CPUState),
437 VMSTATE_END_OF_LIST()
441 #endif
443 CPUState *qemu_get_cpu(int index)
445 CPUState *cpu;
447 CPU_FOREACH(cpu) {
448 if (cpu->cpu_index == index) {
449 return cpu;
453 return NULL;
456 #if !defined(CONFIG_USER_ONLY)
457 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
459 /* We only support one address space per cpu at the moment. */
460 assert(cpu->as == as);
462 if (cpu->tcg_as_listener) {
463 memory_listener_unregister(cpu->tcg_as_listener);
464 } else {
465 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
467 cpu->tcg_as_listener->commit = tcg_commit;
468 memory_listener_register(cpu->tcg_as_listener, as);
470 #endif
472 void cpu_exec_init(CPUArchState *env)
474 CPUState *cpu = ENV_GET_CPU(env);
475 CPUClass *cc = CPU_GET_CLASS(cpu);
476 CPUState *some_cpu;
477 int cpu_index;
479 #if defined(CONFIG_USER_ONLY)
480 cpu_list_lock();
481 #endif
482 cpu_index = 0;
483 CPU_FOREACH(some_cpu) {
484 cpu_index++;
486 cpu->cpu_index = cpu_index;
487 cpu->numa_node = 0;
488 QTAILQ_INIT(&cpu->breakpoints);
489 QTAILQ_INIT(&cpu->watchpoints);
490 #ifndef CONFIG_USER_ONLY
491 cpu->as = &address_space_memory;
492 cpu->thread_id = qemu_get_thread_id();
493 #endif
494 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
495 #if defined(CONFIG_USER_ONLY)
496 cpu_list_unlock();
497 #endif
498 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
499 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
501 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
502 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
503 cpu_save, cpu_load, env);
504 assert(cc->vmsd == NULL);
505 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
506 #endif
507 if (cc->vmsd != NULL) {
508 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
512 #if defined(TARGET_HAS_ICE)
513 #if defined(CONFIG_USER_ONLY)
514 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
516 tb_invalidate_phys_page_range(pc, pc + 1, 0);
518 #else
519 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
521 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
522 if (phys != -1) {
523 tb_invalidate_phys_addr(cpu->as,
524 phys | (pc & ~TARGET_PAGE_MASK));
527 #endif
528 #endif /* TARGET_HAS_ICE */
530 #if defined(CONFIG_USER_ONLY)
531 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
536 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
537 int flags, CPUWatchpoint **watchpoint)
539 return -ENOSYS;
541 #else
542 /* Add a watchpoint. */
543 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
544 int flags, CPUWatchpoint **watchpoint)
546 vaddr len_mask = ~(len - 1);
547 CPUWatchpoint *wp;
549 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
550 if ((len & (len - 1)) || (addr & ~len_mask) ||
551 len == 0 || len > TARGET_PAGE_SIZE) {
552 error_report("tried to set invalid watchpoint at %"
553 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
554 return -EINVAL;
556 wp = g_malloc(sizeof(*wp));
558 wp->vaddr = addr;
559 wp->len_mask = len_mask;
560 wp->flags = flags;
562 /* keep all GDB-injected watchpoints in front */
563 if (flags & BP_GDB) {
564 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
565 } else {
566 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
569 tlb_flush_page(cpu, addr);
571 if (watchpoint)
572 *watchpoint = wp;
573 return 0;
576 /* Remove a specific watchpoint. */
577 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
578 int flags)
580 vaddr len_mask = ~(len - 1);
581 CPUWatchpoint *wp;
583 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
584 if (addr == wp->vaddr && len_mask == wp->len_mask
585 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
586 cpu_watchpoint_remove_by_ref(cpu, wp);
587 return 0;
590 return -ENOENT;
593 /* Remove a specific watchpoint by reference. */
594 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
596 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
598 tlb_flush_page(cpu, watchpoint->vaddr);
600 g_free(watchpoint);
603 /* Remove all matching watchpoints. */
604 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
606 CPUWatchpoint *wp, *next;
608 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
609 if (wp->flags & mask) {
610 cpu_watchpoint_remove_by_ref(cpu, wp);
614 #endif
616 /* Add a breakpoint. */
617 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
618 CPUBreakpoint **breakpoint)
620 #if defined(TARGET_HAS_ICE)
621 CPUBreakpoint *bp;
623 bp = g_malloc(sizeof(*bp));
625 bp->pc = pc;
626 bp->flags = flags;
628 /* keep all GDB-injected breakpoints in front */
629 if (flags & BP_GDB) {
630 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
631 } else {
632 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
635 breakpoint_invalidate(cpu, pc);
637 if (breakpoint) {
638 *breakpoint = bp;
640 return 0;
641 #else
642 return -ENOSYS;
643 #endif
646 /* Remove a specific breakpoint. */
647 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
649 #if defined(TARGET_HAS_ICE)
650 CPUBreakpoint *bp;
652 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
653 if (bp->pc == pc && bp->flags == flags) {
654 cpu_breakpoint_remove_by_ref(cpu, bp);
655 return 0;
658 return -ENOENT;
659 #else
660 return -ENOSYS;
661 #endif
664 /* Remove a specific breakpoint by reference. */
665 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
667 #if defined(TARGET_HAS_ICE)
668 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
670 breakpoint_invalidate(cpu, breakpoint->pc);
672 g_free(breakpoint);
673 #endif
676 /* Remove all matching breakpoints. */
677 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
679 #if defined(TARGET_HAS_ICE)
680 CPUBreakpoint *bp, *next;
682 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
683 if (bp->flags & mask) {
684 cpu_breakpoint_remove_by_ref(cpu, bp);
687 #endif
690 /* enable or disable single step mode. EXCP_DEBUG is returned by the
691 CPU loop after each instruction */
692 void cpu_single_step(CPUState *cpu, int enabled)
694 #if defined(TARGET_HAS_ICE)
695 if (cpu->singlestep_enabled != enabled) {
696 cpu->singlestep_enabled = enabled;
697 if (kvm_enabled()) {
698 kvm_update_guest_debug(cpu, 0);
699 } else {
700 /* must flush all the translated code to avoid inconsistencies */
701 /* XXX: only flush what is necessary */
702 CPUArchState *env = cpu->env_ptr;
703 tb_flush(env);
706 #endif
709 void cpu_abort(CPUState *cpu, const char *fmt, ...)
711 va_list ap;
712 va_list ap2;
714 va_start(ap, fmt);
715 va_copy(ap2, ap);
716 fprintf(stderr, "qemu: fatal: ");
717 vfprintf(stderr, fmt, ap);
718 fprintf(stderr, "\n");
719 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
720 if (qemu_log_enabled()) {
721 qemu_log("qemu: fatal: ");
722 qemu_log_vprintf(fmt, ap2);
723 qemu_log("\n");
724 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
725 qemu_log_flush();
726 qemu_log_close();
728 va_end(ap2);
729 va_end(ap);
730 #if defined(CONFIG_USER_ONLY)
732 struct sigaction act;
733 sigfillset(&act.sa_mask);
734 act.sa_handler = SIG_DFL;
735 sigaction(SIGABRT, &act, NULL);
737 #endif
738 abort();
741 #if !defined(CONFIG_USER_ONLY)
742 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
744 RAMBlock *block;
746 /* The list is protected by the iothread lock here. */
747 block = ram_list.mru_block;
748 if (block && addr - block->offset < block->length) {
749 goto found;
751 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
752 if (addr - block->offset < block->length) {
753 goto found;
757 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
758 abort();
760 found:
761 ram_list.mru_block = block;
762 return block;
765 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
767 ram_addr_t start1;
768 RAMBlock *block;
769 ram_addr_t end;
771 end = TARGET_PAGE_ALIGN(start + length);
772 start &= TARGET_PAGE_MASK;
774 block = qemu_get_ram_block(start);
775 assert(block == qemu_get_ram_block(end - 1));
776 start1 = (uintptr_t)block->host + (start - block->offset);
777 cpu_tlb_reset_dirty_all(start1, length);
780 /* Note: start and end must be within the same ram block. */
781 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
782 unsigned client)
784 if (length == 0)
785 return;
786 cpu_physical_memory_clear_dirty_range(start, length, client);
788 if (tcg_enabled()) {
789 tlb_reset_dirty_range_all(start, length);
793 static void cpu_physical_memory_set_dirty_tracking(bool enable)
795 in_migration = enable;
798 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
799 MemoryRegionSection *section,
800 target_ulong vaddr,
801 hwaddr paddr, hwaddr xlat,
802 int prot,
803 target_ulong *address)
805 hwaddr iotlb;
806 CPUWatchpoint *wp;
808 if (memory_region_is_ram(section->mr)) {
809 /* Normal RAM. */
810 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
811 + xlat;
812 if (!section->readonly) {
813 iotlb |= PHYS_SECTION_NOTDIRTY;
814 } else {
815 iotlb |= PHYS_SECTION_ROM;
817 } else {
818 iotlb = section - section->address_space->dispatch->map.sections;
819 iotlb += xlat;
822 /* Make accesses to pages with watchpoints go via the
823 watchpoint trap routines. */
824 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
825 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
826 /* Avoid trapping reads of pages with a write breakpoint. */
827 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
828 iotlb = PHYS_SECTION_WATCH + paddr;
829 *address |= TLB_MMIO;
830 break;
835 return iotlb;
837 #endif /* defined(CONFIG_USER_ONLY) */
839 #if !defined(CONFIG_USER_ONLY)
841 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
842 uint16_t section);
843 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
845 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
848 * Set a custom physical guest memory alloator.
849 * Accelerators with unusual needs may need this. Hopefully, we can
850 * get rid of it eventually.
852 void phys_mem_set_alloc(void *(*alloc)(size_t))
854 phys_mem_alloc = alloc;
857 static uint16_t phys_section_add(PhysPageMap *map,
858 MemoryRegionSection *section)
860 /* The physical section number is ORed with a page-aligned
861 * pointer to produce the iotlb entries. Thus it should
862 * never overflow into the page-aligned value.
864 assert(map->sections_nb < TARGET_PAGE_SIZE);
866 if (map->sections_nb == map->sections_nb_alloc) {
867 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
868 map->sections = g_renew(MemoryRegionSection, map->sections,
869 map->sections_nb_alloc);
871 map->sections[map->sections_nb] = *section;
872 memory_region_ref(section->mr);
873 return map->sections_nb++;
876 static void phys_section_destroy(MemoryRegion *mr)
878 memory_region_unref(mr);
880 if (mr->subpage) {
881 subpage_t *subpage = container_of(mr, subpage_t, iomem);
882 memory_region_destroy(&subpage->iomem);
883 g_free(subpage);
887 static void phys_sections_free(PhysPageMap *map)
889 while (map->sections_nb > 0) {
890 MemoryRegionSection *section = &map->sections[--map->sections_nb];
891 phys_section_destroy(section->mr);
893 g_free(map->sections);
894 g_free(map->nodes);
897 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
899 subpage_t *subpage;
900 hwaddr base = section->offset_within_address_space
901 & TARGET_PAGE_MASK;
902 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
903 d->map.nodes, d->map.sections);
904 MemoryRegionSection subsection = {
905 .offset_within_address_space = base,
906 .size = int128_make64(TARGET_PAGE_SIZE),
908 hwaddr start, end;
910 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
912 if (!(existing->mr->subpage)) {
913 subpage = subpage_init(d->as, base);
914 subsection.address_space = d->as;
915 subsection.mr = &subpage->iomem;
916 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
917 phys_section_add(&d->map, &subsection));
918 } else {
919 subpage = container_of(existing->mr, subpage_t, iomem);
921 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
922 end = start + int128_get64(section->size) - 1;
923 subpage_register(subpage, start, end,
924 phys_section_add(&d->map, section));
928 static void register_multipage(AddressSpaceDispatch *d,
929 MemoryRegionSection *section)
931 hwaddr start_addr = section->offset_within_address_space;
932 uint16_t section_index = phys_section_add(&d->map, section);
933 uint64_t num_pages = int128_get64(int128_rshift(section->size,
934 TARGET_PAGE_BITS));
936 assert(num_pages);
937 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
940 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
942 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
943 AddressSpaceDispatch *d = as->next_dispatch;
944 MemoryRegionSection now = *section, remain = *section;
945 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
947 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
948 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
949 - now.offset_within_address_space;
951 now.size = int128_min(int128_make64(left), now.size);
952 register_subpage(d, &now);
953 } else {
954 now.size = int128_zero();
956 while (int128_ne(remain.size, now.size)) {
957 remain.size = int128_sub(remain.size, now.size);
958 remain.offset_within_address_space += int128_get64(now.size);
959 remain.offset_within_region += int128_get64(now.size);
960 now = remain;
961 if (int128_lt(remain.size, page_size)) {
962 register_subpage(d, &now);
963 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
964 now.size = page_size;
965 register_subpage(d, &now);
966 } else {
967 now.size = int128_and(now.size, int128_neg(page_size));
968 register_multipage(d, &now);
973 void qemu_flush_coalesced_mmio_buffer(void)
975 if (kvm_enabled())
976 kvm_flush_coalesced_mmio_buffer();
979 void qemu_mutex_lock_ramlist(void)
981 qemu_mutex_lock(&ram_list.mutex);
984 void qemu_mutex_unlock_ramlist(void)
986 qemu_mutex_unlock(&ram_list.mutex);
989 #ifdef __linux__
991 #include <sys/vfs.h>
993 #define HUGETLBFS_MAGIC 0x958458f6
995 static long gethugepagesize(const char *path)
997 struct statfs fs;
998 int ret;
1000 do {
1001 ret = statfs(path, &fs);
1002 } while (ret != 0 && errno == EINTR);
1004 if (ret != 0) {
1005 perror(path);
1006 return 0;
1009 if (fs.f_type != HUGETLBFS_MAGIC)
1010 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1012 return fs.f_bsize;
1015 static sigjmp_buf sigjump;
1017 static void sigbus_handler(int signal)
1019 siglongjmp(sigjump, 1);
1022 static void *file_ram_alloc(RAMBlock *block,
1023 ram_addr_t memory,
1024 const char *path)
1026 char *filename;
1027 char *sanitized_name;
1028 char *c;
1029 void *area;
1030 int fd;
1031 unsigned long hpagesize;
1033 hpagesize = gethugepagesize(path);
1034 if (!hpagesize) {
1035 goto error;
1038 if (memory < hpagesize) {
1039 return NULL;
1042 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1043 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1044 goto error;
1047 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1048 sanitized_name = g_strdup(block->mr->name);
1049 for (c = sanitized_name; *c != '\0'; c++) {
1050 if (*c == '/')
1051 *c = '_';
1054 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1055 sanitized_name);
1056 g_free(sanitized_name);
1058 fd = mkstemp(filename);
1059 if (fd < 0) {
1060 perror("unable to create backing store for hugepages");
1061 g_free(filename);
1062 goto error;
1064 unlink(filename);
1065 g_free(filename);
1067 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1070 * ftruncate is not supported by hugetlbfs in older
1071 * hosts, so don't bother bailing out on errors.
1072 * If anything goes wrong with it under other filesystems,
1073 * mmap will fail.
1075 if (ftruncate(fd, memory))
1076 perror("ftruncate");
1078 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1079 if (area == MAP_FAILED) {
1080 perror("file_ram_alloc: can't mmap RAM pages");
1081 close(fd);
1082 goto error;
1085 if (mem_prealloc) {
1086 int ret, i;
1087 struct sigaction act, oldact;
1088 sigset_t set, oldset;
1090 memset(&act, 0, sizeof(act));
1091 act.sa_handler = &sigbus_handler;
1092 act.sa_flags = 0;
1094 ret = sigaction(SIGBUS, &act, &oldact);
1095 if (ret) {
1096 perror("file_ram_alloc: failed to install signal handler");
1097 exit(1);
1100 /* unblock SIGBUS */
1101 sigemptyset(&set);
1102 sigaddset(&set, SIGBUS);
1103 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1105 if (sigsetjmp(sigjump, 1)) {
1106 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1107 exit(1);
1110 /* MAP_POPULATE silently ignores failures */
1111 for (i = 0; i < (memory/hpagesize); i++) {
1112 memset(area + (hpagesize*i), 0, 1);
1115 ret = sigaction(SIGBUS, &oldact, NULL);
1116 if (ret) {
1117 perror("file_ram_alloc: failed to reinstall signal handler");
1118 exit(1);
1121 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1124 block->fd = fd;
1125 return area;
1127 error:
1128 if (mem_prealloc) {
1129 exit(1);
1131 return NULL;
1133 #else
1134 static void *file_ram_alloc(RAMBlock *block,
1135 ram_addr_t memory,
1136 const char *path)
1138 fprintf(stderr, "-mem-path not supported on this host\n");
1139 exit(1);
1141 #endif
1143 static ram_addr_t find_ram_offset(ram_addr_t size)
1145 RAMBlock *block, *next_block;
1146 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1148 assert(size != 0); /* it would hand out same offset multiple times */
1150 if (QTAILQ_EMPTY(&ram_list.blocks))
1151 return 0;
1153 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1154 ram_addr_t end, next = RAM_ADDR_MAX;
1156 end = block->offset + block->length;
1158 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1159 if (next_block->offset >= end) {
1160 next = MIN(next, next_block->offset);
1163 if (next - end >= size && next - end < mingap) {
1164 offset = end;
1165 mingap = next - end;
1169 if (offset == RAM_ADDR_MAX) {
1170 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1171 (uint64_t)size);
1172 abort();
1175 return offset;
1178 ram_addr_t last_ram_offset(void)
1180 RAMBlock *block;
1181 ram_addr_t last = 0;
1183 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1184 last = MAX(last, block->offset + block->length);
1186 return last;
1189 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1191 int ret;
1193 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1194 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1195 "dump-guest-core", true)) {
1196 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1197 if (ret) {
1198 perror("qemu_madvise");
1199 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1200 "but dump_guest_core=off specified\n");
1205 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1207 RAMBlock *new_block, *block;
1209 new_block = NULL;
1210 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1211 if (block->offset == addr) {
1212 new_block = block;
1213 break;
1216 assert(new_block);
1217 assert(!new_block->idstr[0]);
1219 if (dev) {
1220 char *id = qdev_get_dev_path(dev);
1221 if (id) {
1222 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1223 g_free(id);
1226 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1228 /* This assumes the iothread lock is taken here too. */
1229 qemu_mutex_lock_ramlist();
1230 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1231 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1232 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1233 new_block->idstr);
1234 abort();
1237 qemu_mutex_unlock_ramlist();
1240 static int memory_try_enable_merging(void *addr, size_t len)
1242 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1243 /* disabled by the user */
1244 return 0;
1247 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1250 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1251 MemoryRegion *mr)
1253 RAMBlock *block, *new_block;
1254 ram_addr_t old_ram_size, new_ram_size;
1256 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1258 size = TARGET_PAGE_ALIGN(size);
1259 new_block = g_malloc0(sizeof(*new_block));
1260 new_block->fd = -1;
1262 /* This assumes the iothread lock is taken here too. */
1263 qemu_mutex_lock_ramlist();
1264 new_block->mr = mr;
1265 new_block->offset = find_ram_offset(size);
1266 if (host) {
1267 new_block->host = host;
1268 new_block->flags |= RAM_PREALLOC_MASK;
1269 } else if (xen_enabled()) {
1270 if (mem_path) {
1271 fprintf(stderr, "-mem-path not supported with Xen\n");
1272 exit(1);
1274 xen_ram_alloc(new_block->offset, size, mr);
1275 } else {
1276 if (mem_path) {
1277 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1279 * file_ram_alloc() needs to allocate just like
1280 * phys_mem_alloc, but we haven't bothered to provide
1281 * a hook there.
1283 fprintf(stderr,
1284 "-mem-path not supported with this accelerator\n");
1285 exit(1);
1287 new_block->host = file_ram_alloc(new_block, size, mem_path);
1289 if (!new_block->host) {
1290 new_block->host = phys_mem_alloc(size);
1291 if (!new_block->host) {
1292 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1293 new_block->mr->name, strerror(errno));
1294 exit(1);
1296 memory_try_enable_merging(new_block->host, size);
1299 new_block->length = size;
1301 /* Keep the list sorted from biggest to smallest block. */
1302 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1303 if (block->length < new_block->length) {
1304 break;
1307 if (block) {
1308 QTAILQ_INSERT_BEFORE(block, new_block, next);
1309 } else {
1310 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1312 ram_list.mru_block = NULL;
1314 ram_list.version++;
1315 qemu_mutex_unlock_ramlist();
1317 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1319 if (new_ram_size > old_ram_size) {
1320 int i;
1321 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1322 ram_list.dirty_memory[i] =
1323 bitmap_zero_extend(ram_list.dirty_memory[i],
1324 old_ram_size, new_ram_size);
1327 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1329 qemu_ram_setup_dump(new_block->host, size);
1330 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1331 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1333 if (kvm_enabled())
1334 kvm_setup_guest_memory(new_block->host, size);
1336 return new_block->offset;
1339 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1341 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1344 void qemu_ram_free_from_ptr(ram_addr_t addr)
1346 RAMBlock *block;
1348 /* This assumes the iothread lock is taken here too. */
1349 qemu_mutex_lock_ramlist();
1350 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1351 if (addr == block->offset) {
1352 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1353 ram_list.mru_block = NULL;
1354 ram_list.version++;
1355 g_free(block);
1356 break;
1359 qemu_mutex_unlock_ramlist();
1362 void qemu_ram_free(ram_addr_t addr)
1364 RAMBlock *block;
1366 /* This assumes the iothread lock is taken here too. */
1367 qemu_mutex_lock_ramlist();
1368 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1369 if (addr == block->offset) {
1370 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1371 ram_list.mru_block = NULL;
1372 ram_list.version++;
1373 if (block->flags & RAM_PREALLOC_MASK) {
1375 } else if (xen_enabled()) {
1376 xen_invalidate_map_cache_entry(block->host);
1377 #ifndef _WIN32
1378 } else if (block->fd >= 0) {
1379 munmap(block->host, block->length);
1380 close(block->fd);
1381 #endif
1382 } else {
1383 qemu_anon_ram_free(block->host, block->length);
1385 g_free(block);
1386 break;
1389 qemu_mutex_unlock_ramlist();
1393 #ifndef _WIN32
1394 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1396 RAMBlock *block;
1397 ram_addr_t offset;
1398 int flags;
1399 void *area, *vaddr;
1401 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1402 offset = addr - block->offset;
1403 if (offset < block->length) {
1404 vaddr = block->host + offset;
1405 if (block->flags & RAM_PREALLOC_MASK) {
1407 } else if (xen_enabled()) {
1408 abort();
1409 } else {
1410 flags = MAP_FIXED;
1411 munmap(vaddr, length);
1412 if (block->fd >= 0) {
1413 #ifdef MAP_POPULATE
1414 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1415 MAP_PRIVATE;
1416 #else
1417 flags |= MAP_PRIVATE;
1418 #endif
1419 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1420 flags, block->fd, offset);
1421 } else {
1423 * Remap needs to match alloc. Accelerators that
1424 * set phys_mem_alloc never remap. If they did,
1425 * we'd need a remap hook here.
1427 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1429 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1430 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1431 flags, -1, 0);
1433 if (area != vaddr) {
1434 fprintf(stderr, "Could not remap addr: "
1435 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1436 length, addr);
1437 exit(1);
1439 memory_try_enable_merging(vaddr, length);
1440 qemu_ram_setup_dump(vaddr, length);
1442 return;
1446 #endif /* !_WIN32 */
1448 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1449 With the exception of the softmmu code in this file, this should
1450 only be used for local memory (e.g. video ram) that the device owns,
1451 and knows it isn't going to access beyond the end of the block.
1453 It should not be used for general purpose DMA.
1454 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1456 void *qemu_get_ram_ptr(ram_addr_t addr)
1458 RAMBlock *block = qemu_get_ram_block(addr);
1460 if (xen_enabled()) {
1461 /* We need to check if the requested address is in the RAM
1462 * because we don't want to map the entire memory in QEMU.
1463 * In that case just map until the end of the page.
1465 if (block->offset == 0) {
1466 return xen_map_cache(addr, 0, 0);
1467 } else if (block->host == NULL) {
1468 block->host =
1469 xen_map_cache(block->offset, block->length, 1);
1472 return block->host + (addr - block->offset);
1475 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1476 * but takes a size argument */
1477 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1479 if (*size == 0) {
1480 return NULL;
1482 if (xen_enabled()) {
1483 return xen_map_cache(addr, *size, 1);
1484 } else {
1485 RAMBlock *block;
1487 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1488 if (addr - block->offset < block->length) {
1489 if (addr - block->offset + *size > block->length)
1490 *size = block->length - addr + block->offset;
1491 return block->host + (addr - block->offset);
1495 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1496 abort();
1500 /* Some of the softmmu routines need to translate from a host pointer
1501 (typically a TLB entry) back to a ram offset. */
1502 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1504 RAMBlock *block;
1505 uint8_t *host = ptr;
1507 if (xen_enabled()) {
1508 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1509 return qemu_get_ram_block(*ram_addr)->mr;
1512 block = ram_list.mru_block;
1513 if (block && block->host && host - block->host < block->length) {
1514 goto found;
1517 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1518 /* This case append when the block is not mapped. */
1519 if (block->host == NULL) {
1520 continue;
1522 if (host - block->host < block->length) {
1523 goto found;
1527 return NULL;
1529 found:
1530 *ram_addr = block->offset + (host - block->host);
1531 return block->mr;
1534 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1535 uint64_t val, unsigned size)
1537 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1538 tb_invalidate_phys_page_fast(ram_addr, size);
1540 switch (size) {
1541 case 1:
1542 stb_p(qemu_get_ram_ptr(ram_addr), val);
1543 break;
1544 case 2:
1545 stw_p(qemu_get_ram_ptr(ram_addr), val);
1546 break;
1547 case 4:
1548 stl_p(qemu_get_ram_ptr(ram_addr), val);
1549 break;
1550 default:
1551 abort();
1553 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1554 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1555 /* we remove the notdirty callback only if the code has been
1556 flushed */
1557 if (!cpu_physical_memory_is_clean(ram_addr)) {
1558 CPUArchState *env = current_cpu->env_ptr;
1559 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1563 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1564 unsigned size, bool is_write)
1566 return is_write;
1569 static const MemoryRegionOps notdirty_mem_ops = {
1570 .write = notdirty_mem_write,
1571 .valid.accepts = notdirty_mem_accepts,
1572 .endianness = DEVICE_NATIVE_ENDIAN,
1575 /* Generate a debug exception if a watchpoint has been hit. */
1576 static void check_watchpoint(int offset, int len_mask, int flags)
1578 CPUState *cpu = current_cpu;
1579 CPUArchState *env = cpu->env_ptr;
1580 target_ulong pc, cs_base;
1581 target_ulong vaddr;
1582 CPUWatchpoint *wp;
1583 int cpu_flags;
1585 if (cpu->watchpoint_hit) {
1586 /* We re-entered the check after replacing the TB. Now raise
1587 * the debug interrupt so that is will trigger after the
1588 * current instruction. */
1589 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1590 return;
1592 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1593 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1594 if ((vaddr == (wp->vaddr & len_mask) ||
1595 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1596 wp->flags |= BP_WATCHPOINT_HIT;
1597 if (!cpu->watchpoint_hit) {
1598 cpu->watchpoint_hit = wp;
1599 tb_check_watchpoint(cpu);
1600 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1601 cpu->exception_index = EXCP_DEBUG;
1602 cpu_loop_exit(cpu);
1603 } else {
1604 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1605 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1606 cpu_resume_from_signal(cpu, NULL);
1609 } else {
1610 wp->flags &= ~BP_WATCHPOINT_HIT;
1615 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1616 so these check for a hit then pass through to the normal out-of-line
1617 phys routines. */
1618 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1619 unsigned size)
1621 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1622 switch (size) {
1623 case 1: return ldub_phys(&address_space_memory, addr);
1624 case 2: return lduw_phys(&address_space_memory, addr);
1625 case 4: return ldl_phys(&address_space_memory, addr);
1626 default: abort();
1630 static void watch_mem_write(void *opaque, hwaddr addr,
1631 uint64_t val, unsigned size)
1633 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1634 switch (size) {
1635 case 1:
1636 stb_phys(&address_space_memory, addr, val);
1637 break;
1638 case 2:
1639 stw_phys(&address_space_memory, addr, val);
1640 break;
1641 case 4:
1642 stl_phys(&address_space_memory, addr, val);
1643 break;
1644 default: abort();
1648 static const MemoryRegionOps watch_mem_ops = {
1649 .read = watch_mem_read,
1650 .write = watch_mem_write,
1651 .endianness = DEVICE_NATIVE_ENDIAN,
1654 static uint64_t subpage_read(void *opaque, hwaddr addr,
1655 unsigned len)
1657 subpage_t *subpage = opaque;
1658 uint8_t buf[4];
1660 #if defined(DEBUG_SUBPAGE)
1661 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1662 subpage, len, addr);
1663 #endif
1664 address_space_read(subpage->as, addr + subpage->base, buf, len);
1665 switch (len) {
1666 case 1:
1667 return ldub_p(buf);
1668 case 2:
1669 return lduw_p(buf);
1670 case 4:
1671 return ldl_p(buf);
1672 default:
1673 abort();
1677 static void subpage_write(void *opaque, hwaddr addr,
1678 uint64_t value, unsigned len)
1680 subpage_t *subpage = opaque;
1681 uint8_t buf[4];
1683 #if defined(DEBUG_SUBPAGE)
1684 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1685 " value %"PRIx64"\n",
1686 __func__, subpage, len, addr, value);
1687 #endif
1688 switch (len) {
1689 case 1:
1690 stb_p(buf, value);
1691 break;
1692 case 2:
1693 stw_p(buf, value);
1694 break;
1695 case 4:
1696 stl_p(buf, value);
1697 break;
1698 default:
1699 abort();
1701 address_space_write(subpage->as, addr + subpage->base, buf, len);
1704 static bool subpage_accepts(void *opaque, hwaddr addr,
1705 unsigned len, bool is_write)
1707 subpage_t *subpage = opaque;
1708 #if defined(DEBUG_SUBPAGE)
1709 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1710 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1711 #endif
1713 return address_space_access_valid(subpage->as, addr + subpage->base,
1714 len, is_write);
1717 static const MemoryRegionOps subpage_ops = {
1718 .read = subpage_read,
1719 .write = subpage_write,
1720 .valid.accepts = subpage_accepts,
1721 .endianness = DEVICE_NATIVE_ENDIAN,
1724 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1725 uint16_t section)
1727 int idx, eidx;
1729 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1730 return -1;
1731 idx = SUBPAGE_IDX(start);
1732 eidx = SUBPAGE_IDX(end);
1733 #if defined(DEBUG_SUBPAGE)
1734 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1735 __func__, mmio, start, end, idx, eidx, section);
1736 #endif
1737 for (; idx <= eidx; idx++) {
1738 mmio->sub_section[idx] = section;
1741 return 0;
1744 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1746 subpage_t *mmio;
1748 mmio = g_malloc0(sizeof(subpage_t));
1750 mmio->as = as;
1751 mmio->base = base;
1752 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1753 "subpage", TARGET_PAGE_SIZE);
1754 mmio->iomem.subpage = true;
1755 #if defined(DEBUG_SUBPAGE)
1756 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1757 mmio, base, TARGET_PAGE_SIZE);
1758 #endif
1759 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1761 return mmio;
1764 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1766 MemoryRegionSection section = {
1767 .address_space = &address_space_memory,
1768 .mr = mr,
1769 .offset_within_address_space = 0,
1770 .offset_within_region = 0,
1771 .size = int128_2_64(),
1774 return phys_section_add(map, &section);
1777 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1779 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1782 static void io_mem_init(void)
1784 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1785 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1786 "unassigned", UINT64_MAX);
1787 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1788 "notdirty", UINT64_MAX);
1789 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1790 "watch", UINT64_MAX);
1793 static void mem_begin(MemoryListener *listener)
1795 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1796 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1797 uint16_t n;
1799 n = dummy_section(&d->map, &io_mem_unassigned);
1800 assert(n == PHYS_SECTION_UNASSIGNED);
1801 n = dummy_section(&d->map, &io_mem_notdirty);
1802 assert(n == PHYS_SECTION_NOTDIRTY);
1803 n = dummy_section(&d->map, &io_mem_rom);
1804 assert(n == PHYS_SECTION_ROM);
1805 n = dummy_section(&d->map, &io_mem_watch);
1806 assert(n == PHYS_SECTION_WATCH);
1808 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1809 d->as = as;
1810 as->next_dispatch = d;
1813 static void mem_commit(MemoryListener *listener)
1815 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1816 AddressSpaceDispatch *cur = as->dispatch;
1817 AddressSpaceDispatch *next = as->next_dispatch;
1819 phys_page_compact_all(next, next->map.nodes_nb);
1821 as->dispatch = next;
1823 if (cur) {
1824 phys_sections_free(&cur->map);
1825 g_free(cur);
1829 static void tcg_commit(MemoryListener *listener)
1831 CPUState *cpu;
1833 /* since each CPU stores ram addresses in its TLB cache, we must
1834 reset the modified entries */
1835 /* XXX: slow ! */
1836 CPU_FOREACH(cpu) {
1837 /* FIXME: Disentangle the cpu.h circular files deps so we can
1838 directly get the right CPU from listener. */
1839 if (cpu->tcg_as_listener != listener) {
1840 continue;
1842 tlb_flush(cpu, 1);
1846 static void core_log_global_start(MemoryListener *listener)
1848 cpu_physical_memory_set_dirty_tracking(true);
1851 static void core_log_global_stop(MemoryListener *listener)
1853 cpu_physical_memory_set_dirty_tracking(false);
1856 static MemoryListener core_memory_listener = {
1857 .log_global_start = core_log_global_start,
1858 .log_global_stop = core_log_global_stop,
1859 .priority = 1,
1862 void address_space_init_dispatch(AddressSpace *as)
1864 as->dispatch = NULL;
1865 as->dispatch_listener = (MemoryListener) {
1866 .begin = mem_begin,
1867 .commit = mem_commit,
1868 .region_add = mem_add,
1869 .region_nop = mem_add,
1870 .priority = 0,
1872 memory_listener_register(&as->dispatch_listener, as);
1875 void address_space_destroy_dispatch(AddressSpace *as)
1877 AddressSpaceDispatch *d = as->dispatch;
1879 memory_listener_unregister(&as->dispatch_listener);
1880 g_free(d);
1881 as->dispatch = NULL;
1884 static void memory_map_init(void)
1886 system_memory = g_malloc(sizeof(*system_memory));
1888 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1889 address_space_init(&address_space_memory, system_memory, "memory");
1891 system_io = g_malloc(sizeof(*system_io));
1892 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1893 65536);
1894 address_space_init(&address_space_io, system_io, "I/O");
1896 memory_listener_register(&core_memory_listener, &address_space_memory);
1899 MemoryRegion *get_system_memory(void)
1901 return system_memory;
1904 MemoryRegion *get_system_io(void)
1906 return system_io;
1909 #endif /* !defined(CONFIG_USER_ONLY) */
1911 /* physical memory access (slow version, mainly for debug) */
1912 #if defined(CONFIG_USER_ONLY)
1913 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1914 uint8_t *buf, int len, int is_write)
1916 int l, flags;
1917 target_ulong page;
1918 void * p;
1920 while (len > 0) {
1921 page = addr & TARGET_PAGE_MASK;
1922 l = (page + TARGET_PAGE_SIZE) - addr;
1923 if (l > len)
1924 l = len;
1925 flags = page_get_flags(page);
1926 if (!(flags & PAGE_VALID))
1927 return -1;
1928 if (is_write) {
1929 if (!(flags & PAGE_WRITE))
1930 return -1;
1931 /* XXX: this code should not depend on lock_user */
1932 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1933 return -1;
1934 memcpy(p, buf, l);
1935 unlock_user(p, addr, l);
1936 } else {
1937 if (!(flags & PAGE_READ))
1938 return -1;
1939 /* XXX: this code should not depend on lock_user */
1940 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1941 return -1;
1942 memcpy(buf, p, l);
1943 unlock_user(p, addr, 0);
1945 len -= l;
1946 buf += l;
1947 addr += l;
1949 return 0;
1952 #else
1954 static void invalidate_and_set_dirty(hwaddr addr,
1955 hwaddr length)
1957 if (cpu_physical_memory_is_clean(addr)) {
1958 /* invalidate code */
1959 tb_invalidate_phys_page_range(addr, addr + length, 0);
1960 /* set dirty bit */
1961 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1962 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1964 xen_modified_memory(addr, length);
1967 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1969 unsigned access_size_max = mr->ops->valid.max_access_size;
1971 /* Regions are assumed to support 1-4 byte accesses unless
1972 otherwise specified. */
1973 if (access_size_max == 0) {
1974 access_size_max = 4;
1977 /* Bound the maximum access by the alignment of the address. */
1978 if (!mr->ops->impl.unaligned) {
1979 unsigned align_size_max = addr & -addr;
1980 if (align_size_max != 0 && align_size_max < access_size_max) {
1981 access_size_max = align_size_max;
1985 /* Don't attempt accesses larger than the maximum. */
1986 if (l > access_size_max) {
1987 l = access_size_max;
1989 if (l & (l - 1)) {
1990 l = 1 << (qemu_fls(l) - 1);
1993 return l;
1996 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1997 int len, bool is_write)
1999 hwaddr l;
2000 uint8_t *ptr;
2001 uint64_t val;
2002 hwaddr addr1;
2003 MemoryRegion *mr;
2004 bool error = false;
2006 while (len > 0) {
2007 l = len;
2008 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2010 if (is_write) {
2011 if (!memory_access_is_direct(mr, is_write)) {
2012 l = memory_access_size(mr, l, addr1);
2013 /* XXX: could force current_cpu to NULL to avoid
2014 potential bugs */
2015 switch (l) {
2016 case 8:
2017 /* 64 bit write access */
2018 val = ldq_p(buf);
2019 error |= io_mem_write(mr, addr1, val, 8);
2020 break;
2021 case 4:
2022 /* 32 bit write access */
2023 val = ldl_p(buf);
2024 error |= io_mem_write(mr, addr1, val, 4);
2025 break;
2026 case 2:
2027 /* 16 bit write access */
2028 val = lduw_p(buf);
2029 error |= io_mem_write(mr, addr1, val, 2);
2030 break;
2031 case 1:
2032 /* 8 bit write access */
2033 val = ldub_p(buf);
2034 error |= io_mem_write(mr, addr1, val, 1);
2035 break;
2036 default:
2037 abort();
2039 } else {
2040 addr1 += memory_region_get_ram_addr(mr);
2041 /* RAM case */
2042 ptr = qemu_get_ram_ptr(addr1);
2043 memcpy(ptr, buf, l);
2044 invalidate_and_set_dirty(addr1, l);
2046 } else {
2047 if (!memory_access_is_direct(mr, is_write)) {
2048 /* I/O case */
2049 l = memory_access_size(mr, l, addr1);
2050 switch (l) {
2051 case 8:
2052 /* 64 bit read access */
2053 error |= io_mem_read(mr, addr1, &val, 8);
2054 stq_p(buf, val);
2055 break;
2056 case 4:
2057 /* 32 bit read access */
2058 error |= io_mem_read(mr, addr1, &val, 4);
2059 stl_p(buf, val);
2060 break;
2061 case 2:
2062 /* 16 bit read access */
2063 error |= io_mem_read(mr, addr1, &val, 2);
2064 stw_p(buf, val);
2065 break;
2066 case 1:
2067 /* 8 bit read access */
2068 error |= io_mem_read(mr, addr1, &val, 1);
2069 stb_p(buf, val);
2070 break;
2071 default:
2072 abort();
2074 } else {
2075 /* RAM case */
2076 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2077 memcpy(buf, ptr, l);
2080 len -= l;
2081 buf += l;
2082 addr += l;
2085 return error;
2088 bool address_space_write(AddressSpace *as, hwaddr addr,
2089 const uint8_t *buf, int len)
2091 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2094 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2096 return address_space_rw(as, addr, buf, len, false);
2100 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2101 int len, int is_write)
2103 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2106 enum write_rom_type {
2107 WRITE_DATA,
2108 FLUSH_CACHE,
2111 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2112 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2114 hwaddr l;
2115 uint8_t *ptr;
2116 hwaddr addr1;
2117 MemoryRegion *mr;
2119 while (len > 0) {
2120 l = len;
2121 mr = address_space_translate(as, addr, &addr1, &l, true);
2123 if (!(memory_region_is_ram(mr) ||
2124 memory_region_is_romd(mr))) {
2125 /* do nothing */
2126 } else {
2127 addr1 += memory_region_get_ram_addr(mr);
2128 /* ROM/RAM case */
2129 ptr = qemu_get_ram_ptr(addr1);
2130 switch (type) {
2131 case WRITE_DATA:
2132 memcpy(ptr, buf, l);
2133 invalidate_and_set_dirty(addr1, l);
2134 break;
2135 case FLUSH_CACHE:
2136 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2137 break;
2140 len -= l;
2141 buf += l;
2142 addr += l;
2146 /* used for ROM loading : can write in RAM and ROM */
2147 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2148 const uint8_t *buf, int len)
2150 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2153 void cpu_flush_icache_range(hwaddr start, int len)
2156 * This function should do the same thing as an icache flush that was
2157 * triggered from within the guest. For TCG we are always cache coherent,
2158 * so there is no need to flush anything. For KVM / Xen we need to flush
2159 * the host's instruction cache at least.
2161 if (tcg_enabled()) {
2162 return;
2165 cpu_physical_memory_write_rom_internal(&address_space_memory,
2166 start, NULL, len, FLUSH_CACHE);
2169 typedef struct {
2170 MemoryRegion *mr;
2171 void *buffer;
2172 hwaddr addr;
2173 hwaddr len;
2174 } BounceBuffer;
2176 static BounceBuffer bounce;
2178 typedef struct MapClient {
2179 void *opaque;
2180 void (*callback)(void *opaque);
2181 QLIST_ENTRY(MapClient) link;
2182 } MapClient;
2184 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2185 = QLIST_HEAD_INITIALIZER(map_client_list);
2187 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2189 MapClient *client = g_malloc(sizeof(*client));
2191 client->opaque = opaque;
2192 client->callback = callback;
2193 QLIST_INSERT_HEAD(&map_client_list, client, link);
2194 return client;
2197 static void cpu_unregister_map_client(void *_client)
2199 MapClient *client = (MapClient *)_client;
2201 QLIST_REMOVE(client, link);
2202 g_free(client);
2205 static void cpu_notify_map_clients(void)
2207 MapClient *client;
2209 while (!QLIST_EMPTY(&map_client_list)) {
2210 client = QLIST_FIRST(&map_client_list);
2211 client->callback(client->opaque);
2212 cpu_unregister_map_client(client);
2216 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2218 MemoryRegion *mr;
2219 hwaddr l, xlat;
2221 while (len > 0) {
2222 l = len;
2223 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2224 if (!memory_access_is_direct(mr, is_write)) {
2225 l = memory_access_size(mr, l, addr);
2226 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2227 return false;
2231 len -= l;
2232 addr += l;
2234 return true;
2237 /* Map a physical memory region into a host virtual address.
2238 * May map a subset of the requested range, given by and returned in *plen.
2239 * May return NULL if resources needed to perform the mapping are exhausted.
2240 * Use only for reads OR writes - not for read-modify-write operations.
2241 * Use cpu_register_map_client() to know when retrying the map operation is
2242 * likely to succeed.
2244 void *address_space_map(AddressSpace *as,
2245 hwaddr addr,
2246 hwaddr *plen,
2247 bool is_write)
2249 hwaddr len = *plen;
2250 hwaddr done = 0;
2251 hwaddr l, xlat, base;
2252 MemoryRegion *mr, *this_mr;
2253 ram_addr_t raddr;
2255 if (len == 0) {
2256 return NULL;
2259 l = len;
2260 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2261 if (!memory_access_is_direct(mr, is_write)) {
2262 if (bounce.buffer) {
2263 return NULL;
2265 /* Avoid unbounded allocations */
2266 l = MIN(l, TARGET_PAGE_SIZE);
2267 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2268 bounce.addr = addr;
2269 bounce.len = l;
2271 memory_region_ref(mr);
2272 bounce.mr = mr;
2273 if (!is_write) {
2274 address_space_read(as, addr, bounce.buffer, l);
2277 *plen = l;
2278 return bounce.buffer;
2281 base = xlat;
2282 raddr = memory_region_get_ram_addr(mr);
2284 for (;;) {
2285 len -= l;
2286 addr += l;
2287 done += l;
2288 if (len == 0) {
2289 break;
2292 l = len;
2293 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2294 if (this_mr != mr || xlat != base + done) {
2295 break;
2299 memory_region_ref(mr);
2300 *plen = done;
2301 return qemu_ram_ptr_length(raddr + base, plen);
2304 /* Unmaps a memory region previously mapped by address_space_map().
2305 * Will also mark the memory as dirty if is_write == 1. access_len gives
2306 * the amount of memory that was actually read or written by the caller.
2308 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2309 int is_write, hwaddr access_len)
2311 if (buffer != bounce.buffer) {
2312 MemoryRegion *mr;
2313 ram_addr_t addr1;
2315 mr = qemu_ram_addr_from_host(buffer, &addr1);
2316 assert(mr != NULL);
2317 if (is_write) {
2318 while (access_len) {
2319 unsigned l;
2320 l = TARGET_PAGE_SIZE;
2321 if (l > access_len)
2322 l = access_len;
2323 invalidate_and_set_dirty(addr1, l);
2324 addr1 += l;
2325 access_len -= l;
2328 if (xen_enabled()) {
2329 xen_invalidate_map_cache_entry(buffer);
2331 memory_region_unref(mr);
2332 return;
2334 if (is_write) {
2335 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2337 qemu_vfree(bounce.buffer);
2338 bounce.buffer = NULL;
2339 memory_region_unref(bounce.mr);
2340 cpu_notify_map_clients();
2343 void *cpu_physical_memory_map(hwaddr addr,
2344 hwaddr *plen,
2345 int is_write)
2347 return address_space_map(&address_space_memory, addr, plen, is_write);
2350 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2351 int is_write, hwaddr access_len)
2353 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2356 /* warning: addr must be aligned */
2357 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2358 enum device_endian endian)
2360 uint8_t *ptr;
2361 uint64_t val;
2362 MemoryRegion *mr;
2363 hwaddr l = 4;
2364 hwaddr addr1;
2366 mr = address_space_translate(as, addr, &addr1, &l, false);
2367 if (l < 4 || !memory_access_is_direct(mr, false)) {
2368 /* I/O case */
2369 io_mem_read(mr, addr1, &val, 4);
2370 #if defined(TARGET_WORDS_BIGENDIAN)
2371 if (endian == DEVICE_LITTLE_ENDIAN) {
2372 val = bswap32(val);
2374 #else
2375 if (endian == DEVICE_BIG_ENDIAN) {
2376 val = bswap32(val);
2378 #endif
2379 } else {
2380 /* RAM case */
2381 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2382 & TARGET_PAGE_MASK)
2383 + addr1);
2384 switch (endian) {
2385 case DEVICE_LITTLE_ENDIAN:
2386 val = ldl_le_p(ptr);
2387 break;
2388 case DEVICE_BIG_ENDIAN:
2389 val = ldl_be_p(ptr);
2390 break;
2391 default:
2392 val = ldl_p(ptr);
2393 break;
2396 return val;
2399 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2401 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2404 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2406 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2409 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2411 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2414 /* warning: addr must be aligned */
2415 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2416 enum device_endian endian)
2418 uint8_t *ptr;
2419 uint64_t val;
2420 MemoryRegion *mr;
2421 hwaddr l = 8;
2422 hwaddr addr1;
2424 mr = address_space_translate(as, addr, &addr1, &l,
2425 false);
2426 if (l < 8 || !memory_access_is_direct(mr, false)) {
2427 /* I/O case */
2428 io_mem_read(mr, addr1, &val, 8);
2429 #if defined(TARGET_WORDS_BIGENDIAN)
2430 if (endian == DEVICE_LITTLE_ENDIAN) {
2431 val = bswap64(val);
2433 #else
2434 if (endian == DEVICE_BIG_ENDIAN) {
2435 val = bswap64(val);
2437 #endif
2438 } else {
2439 /* RAM case */
2440 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2441 & TARGET_PAGE_MASK)
2442 + addr1);
2443 switch (endian) {
2444 case DEVICE_LITTLE_ENDIAN:
2445 val = ldq_le_p(ptr);
2446 break;
2447 case DEVICE_BIG_ENDIAN:
2448 val = ldq_be_p(ptr);
2449 break;
2450 default:
2451 val = ldq_p(ptr);
2452 break;
2455 return val;
2458 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2460 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2463 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2465 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2468 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2470 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2473 /* XXX: optimize */
2474 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2476 uint8_t val;
2477 address_space_rw(as, addr, &val, 1, 0);
2478 return val;
2481 /* warning: addr must be aligned */
2482 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2483 enum device_endian endian)
2485 uint8_t *ptr;
2486 uint64_t val;
2487 MemoryRegion *mr;
2488 hwaddr l = 2;
2489 hwaddr addr1;
2491 mr = address_space_translate(as, addr, &addr1, &l,
2492 false);
2493 if (l < 2 || !memory_access_is_direct(mr, false)) {
2494 /* I/O case */
2495 io_mem_read(mr, addr1, &val, 2);
2496 #if defined(TARGET_WORDS_BIGENDIAN)
2497 if (endian == DEVICE_LITTLE_ENDIAN) {
2498 val = bswap16(val);
2500 #else
2501 if (endian == DEVICE_BIG_ENDIAN) {
2502 val = bswap16(val);
2504 #endif
2505 } else {
2506 /* RAM case */
2507 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2508 & TARGET_PAGE_MASK)
2509 + addr1);
2510 switch (endian) {
2511 case DEVICE_LITTLE_ENDIAN:
2512 val = lduw_le_p(ptr);
2513 break;
2514 case DEVICE_BIG_ENDIAN:
2515 val = lduw_be_p(ptr);
2516 break;
2517 default:
2518 val = lduw_p(ptr);
2519 break;
2522 return val;
2525 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2527 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2530 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2532 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2535 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2537 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2540 /* warning: addr must be aligned. The ram page is not masked as dirty
2541 and the code inside is not invalidated. It is useful if the dirty
2542 bits are used to track modified PTEs */
2543 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2545 uint8_t *ptr;
2546 MemoryRegion *mr;
2547 hwaddr l = 4;
2548 hwaddr addr1;
2550 mr = address_space_translate(as, addr, &addr1, &l,
2551 true);
2552 if (l < 4 || !memory_access_is_direct(mr, true)) {
2553 io_mem_write(mr, addr1, val, 4);
2554 } else {
2555 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2556 ptr = qemu_get_ram_ptr(addr1);
2557 stl_p(ptr, val);
2559 if (unlikely(in_migration)) {
2560 if (cpu_physical_memory_is_clean(addr1)) {
2561 /* invalidate code */
2562 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2563 /* set dirty bit */
2564 cpu_physical_memory_set_dirty_flag(addr1,
2565 DIRTY_MEMORY_MIGRATION);
2566 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2572 /* warning: addr must be aligned */
2573 static inline void stl_phys_internal(AddressSpace *as,
2574 hwaddr addr, uint32_t val,
2575 enum device_endian endian)
2577 uint8_t *ptr;
2578 MemoryRegion *mr;
2579 hwaddr l = 4;
2580 hwaddr addr1;
2582 mr = address_space_translate(as, addr, &addr1, &l,
2583 true);
2584 if (l < 4 || !memory_access_is_direct(mr, true)) {
2585 #if defined(TARGET_WORDS_BIGENDIAN)
2586 if (endian == DEVICE_LITTLE_ENDIAN) {
2587 val = bswap32(val);
2589 #else
2590 if (endian == DEVICE_BIG_ENDIAN) {
2591 val = bswap32(val);
2593 #endif
2594 io_mem_write(mr, addr1, val, 4);
2595 } else {
2596 /* RAM case */
2597 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2598 ptr = qemu_get_ram_ptr(addr1);
2599 switch (endian) {
2600 case DEVICE_LITTLE_ENDIAN:
2601 stl_le_p(ptr, val);
2602 break;
2603 case DEVICE_BIG_ENDIAN:
2604 stl_be_p(ptr, val);
2605 break;
2606 default:
2607 stl_p(ptr, val);
2608 break;
2610 invalidate_and_set_dirty(addr1, 4);
2614 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2616 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2619 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2621 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2624 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2626 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2629 /* XXX: optimize */
2630 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2632 uint8_t v = val;
2633 address_space_rw(as, addr, &v, 1, 1);
2636 /* warning: addr must be aligned */
2637 static inline void stw_phys_internal(AddressSpace *as,
2638 hwaddr addr, uint32_t val,
2639 enum device_endian endian)
2641 uint8_t *ptr;
2642 MemoryRegion *mr;
2643 hwaddr l = 2;
2644 hwaddr addr1;
2646 mr = address_space_translate(as, addr, &addr1, &l, true);
2647 if (l < 2 || !memory_access_is_direct(mr, true)) {
2648 #if defined(TARGET_WORDS_BIGENDIAN)
2649 if (endian == DEVICE_LITTLE_ENDIAN) {
2650 val = bswap16(val);
2652 #else
2653 if (endian == DEVICE_BIG_ENDIAN) {
2654 val = bswap16(val);
2656 #endif
2657 io_mem_write(mr, addr1, val, 2);
2658 } else {
2659 /* RAM case */
2660 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2661 ptr = qemu_get_ram_ptr(addr1);
2662 switch (endian) {
2663 case DEVICE_LITTLE_ENDIAN:
2664 stw_le_p(ptr, val);
2665 break;
2666 case DEVICE_BIG_ENDIAN:
2667 stw_be_p(ptr, val);
2668 break;
2669 default:
2670 stw_p(ptr, val);
2671 break;
2673 invalidate_and_set_dirty(addr1, 2);
2677 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2679 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2682 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2684 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2687 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2689 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2692 /* XXX: optimize */
2693 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2695 val = tswap64(val);
2696 address_space_rw(as, addr, (void *) &val, 8, 1);
2699 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2701 val = cpu_to_le64(val);
2702 address_space_rw(as, addr, (void *) &val, 8, 1);
2705 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2707 val = cpu_to_be64(val);
2708 address_space_rw(as, addr, (void *) &val, 8, 1);
2711 /* virtual memory access for debug (includes writing to ROM) */
2712 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2713 uint8_t *buf, int len, int is_write)
2715 int l;
2716 hwaddr phys_addr;
2717 target_ulong page;
2719 while (len > 0) {
2720 page = addr & TARGET_PAGE_MASK;
2721 phys_addr = cpu_get_phys_page_debug(cpu, page);
2722 /* if no physical page mapped, return an error */
2723 if (phys_addr == -1)
2724 return -1;
2725 l = (page + TARGET_PAGE_SIZE) - addr;
2726 if (l > len)
2727 l = len;
2728 phys_addr += (addr & ~TARGET_PAGE_MASK);
2729 if (is_write) {
2730 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2731 } else {
2732 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2734 len -= l;
2735 buf += l;
2736 addr += l;
2738 return 0;
2740 #endif
2742 #if !defined(CONFIG_USER_ONLY)
2745 * A helper function for the _utterly broken_ virtio device model to find out if
2746 * it's running on a big endian machine. Don't do this at home kids!
2748 bool virtio_is_big_endian(void);
2749 bool virtio_is_big_endian(void)
2751 #if defined(TARGET_WORDS_BIGENDIAN)
2752 return true;
2753 #else
2754 return false;
2755 #endif
2758 #endif
2760 #ifndef CONFIG_USER_ONLY
2761 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2763 MemoryRegion*mr;
2764 hwaddr l = 1;
2766 mr = address_space_translate(&address_space_memory,
2767 phys_addr, &phys_addr, &l, false);
2769 return !(memory_region_is_ram(mr) ||
2770 memory_region_is_romd(mr));
2773 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2775 RAMBlock *block;
2777 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2778 func(block->host, block->offset, block->length, opaque);
2781 #endif