block/nfs: refuse readahead if cache.direct is on
[qemu/kevin.git] / exec.c
blob0122ef76de5d8331d6892e5f60657469294eca03
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
97 /* current CPU in the current thread. It is only valid inside
98 cpu_exec() */
99 __thread CPUState *current_cpu;
100 /* 0 = Do not count executed instructions.
101 1 = Precise instruction counting.
102 2 = Adaptive rate instruction counting. */
103 int use_icount;
105 #if !defined(CONFIG_USER_ONLY)
107 typedef struct PhysPageEntry PhysPageEntry;
109 struct PhysPageEntry {
110 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111 uint32_t skip : 6;
112 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
113 uint32_t ptr : 26;
116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
118 /* Size of the L2 (and L3, etc) page tables. */
119 #define ADDR_SPACE_BITS 64
121 #define P_L2_BITS 9
122 #define P_L2_SIZE (1 << P_L2_BITS)
124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
126 typedef PhysPageEntry Node[P_L2_SIZE];
128 typedef struct PhysPageMap {
129 struct rcu_head rcu;
131 unsigned sections_nb;
132 unsigned sections_nb_alloc;
133 unsigned nodes_nb;
134 unsigned nodes_nb_alloc;
135 Node *nodes;
136 MemoryRegionSection *sections;
137 } PhysPageMap;
139 struct AddressSpaceDispatch {
140 struct rcu_head rcu;
142 MemoryRegionSection *mru_section;
143 /* This is a multi-level map on the physical address space.
144 * The bottom level has pointers to MemoryRegionSections.
146 PhysPageEntry phys_map;
147 PhysPageMap map;
148 AddressSpace *as;
151 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
152 typedef struct subpage_t {
153 MemoryRegion iomem;
154 AddressSpace *as;
155 hwaddr base;
156 uint16_t sub_section[TARGET_PAGE_SIZE];
157 } subpage_t;
159 #define PHYS_SECTION_UNASSIGNED 0
160 #define PHYS_SECTION_NOTDIRTY 1
161 #define PHYS_SECTION_ROM 2
162 #define PHYS_SECTION_WATCH 3
164 static void io_mem_init(void);
165 static void memory_map_init(void);
166 static void tcg_commit(MemoryListener *listener);
168 static MemoryRegion io_mem_watch;
171 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
172 * @cpu: the CPU whose AddressSpace this is
173 * @as: the AddressSpace itself
174 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
175 * @tcg_as_listener: listener for tracking changes to the AddressSpace
177 struct CPUAddressSpace {
178 CPUState *cpu;
179 AddressSpace *as;
180 struct AddressSpaceDispatch *memory_dispatch;
181 MemoryListener tcg_as_listener;
184 #endif
186 #if !defined(CONFIG_USER_ONLY)
188 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
190 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
191 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
192 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
193 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
197 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
199 unsigned i;
200 uint32_t ret;
201 PhysPageEntry e;
202 PhysPageEntry *p;
204 ret = map->nodes_nb++;
205 p = map->nodes[ret];
206 assert(ret != PHYS_MAP_NODE_NIL);
207 assert(ret != map->nodes_nb_alloc);
209 e.skip = leaf ? 0 : 1;
210 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
211 for (i = 0; i < P_L2_SIZE; ++i) {
212 memcpy(&p[i], &e, sizeof(e));
214 return ret;
217 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
218 hwaddr *index, hwaddr *nb, uint16_t leaf,
219 int level)
221 PhysPageEntry *p;
222 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
224 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
225 lp->ptr = phys_map_node_alloc(map, level == 0);
227 p = map->nodes[lp->ptr];
228 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
230 while (*nb && lp < &p[P_L2_SIZE]) {
231 if ((*index & (step - 1)) == 0 && *nb >= step) {
232 lp->skip = 0;
233 lp->ptr = leaf;
234 *index += step;
235 *nb -= step;
236 } else {
237 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
239 ++lp;
243 static void phys_page_set(AddressSpaceDispatch *d,
244 hwaddr index, hwaddr nb,
245 uint16_t leaf)
247 /* Wildly overreserve - it doesn't matter much. */
248 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
250 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
253 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
254 * and update our entry so we can skip it and go directly to the destination.
256 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
258 unsigned valid_ptr = P_L2_SIZE;
259 int valid = 0;
260 PhysPageEntry *p;
261 int i;
263 if (lp->ptr == PHYS_MAP_NODE_NIL) {
264 return;
267 p = nodes[lp->ptr];
268 for (i = 0; i < P_L2_SIZE; i++) {
269 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
270 continue;
273 valid_ptr = i;
274 valid++;
275 if (p[i].skip) {
276 phys_page_compact(&p[i], nodes, compacted);
280 /* We can only compress if there's only one child. */
281 if (valid != 1) {
282 return;
285 assert(valid_ptr < P_L2_SIZE);
287 /* Don't compress if it won't fit in the # of bits we have. */
288 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
289 return;
292 lp->ptr = p[valid_ptr].ptr;
293 if (!p[valid_ptr].skip) {
294 /* If our only child is a leaf, make this a leaf. */
295 /* By design, we should have made this node a leaf to begin with so we
296 * should never reach here.
297 * But since it's so simple to handle this, let's do it just in case we
298 * change this rule.
300 lp->skip = 0;
301 } else {
302 lp->skip += p[valid_ptr].skip;
306 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
308 DECLARE_BITMAP(compacted, nodes_nb);
310 if (d->phys_map.skip) {
311 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
315 static inline bool section_covers_addr(const MemoryRegionSection *section,
316 hwaddr addr)
318 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
319 * the section must cover the entire address space.
321 return section->size.hi ||
322 range_covers_byte(section->offset_within_address_space,
323 section->size.lo, addr);
326 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
327 Node *nodes, MemoryRegionSection *sections)
329 PhysPageEntry *p;
330 hwaddr index = addr >> TARGET_PAGE_BITS;
331 int i;
333 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
334 if (lp.ptr == PHYS_MAP_NODE_NIL) {
335 return &sections[PHYS_SECTION_UNASSIGNED];
337 p = nodes[lp.ptr];
338 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
341 if (section_covers_addr(&sections[lp.ptr], addr)) {
342 return &sections[lp.ptr];
343 } else {
344 return &sections[PHYS_SECTION_UNASSIGNED];
348 bool memory_region_is_unassigned(MemoryRegion *mr)
350 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
351 && mr != &io_mem_watch;
354 /* Called from RCU critical section */
355 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
356 hwaddr addr,
357 bool resolve_subpage)
359 MemoryRegionSection *section = atomic_read(&d->mru_section);
360 subpage_t *subpage;
361 bool update;
363 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
364 section_covers_addr(section, addr)) {
365 update = false;
366 } else {
367 section = phys_page_find(d->phys_map, addr, d->map.nodes,
368 d->map.sections);
369 update = true;
371 if (resolve_subpage && section->mr->subpage) {
372 subpage = container_of(section->mr, subpage_t, iomem);
373 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
375 if (update) {
376 atomic_set(&d->mru_section, section);
378 return section;
381 /* Called from RCU critical section */
382 static MemoryRegionSection *
383 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
384 hwaddr *plen, bool resolve_subpage)
386 MemoryRegionSection *section;
387 MemoryRegion *mr;
388 Int128 diff;
390 section = address_space_lookup_region(d, addr, resolve_subpage);
391 /* Compute offset within MemoryRegionSection */
392 addr -= section->offset_within_address_space;
394 /* Compute offset within MemoryRegion */
395 *xlat = addr + section->offset_within_region;
397 mr = section->mr;
399 /* MMIO registers can be expected to perform full-width accesses based only
400 * on their address, without considering adjacent registers that could
401 * decode to completely different MemoryRegions. When such registers
402 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
403 * regions overlap wildly. For this reason we cannot clamp the accesses
404 * here.
406 * If the length is small (as is the case for address_space_ldl/stl),
407 * everything works fine. If the incoming length is large, however,
408 * the caller really has to do the clamping through memory_access_size.
410 if (memory_region_is_ram(mr)) {
411 diff = int128_sub(section->size, int128_make64(addr));
412 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
414 return section;
417 /* Called from RCU critical section */
418 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
419 hwaddr *xlat, hwaddr *plen,
420 bool is_write)
422 IOMMUTLBEntry iotlb;
423 MemoryRegionSection *section;
424 MemoryRegion *mr;
426 for (;;) {
427 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
428 section = address_space_translate_internal(d, addr, &addr, plen, true);
429 mr = section->mr;
431 if (!mr->iommu_ops) {
432 break;
435 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
436 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
437 | (addr & iotlb.addr_mask));
438 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
439 if (!(iotlb.perm & (1 << is_write))) {
440 mr = &io_mem_unassigned;
441 break;
444 as = iotlb.target_as;
447 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
448 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
449 *plen = MIN(page, *plen);
452 *xlat = addr;
453 return mr;
456 /* Called from RCU critical section */
457 MemoryRegionSection *
458 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
459 hwaddr *xlat, hwaddr *plen)
461 MemoryRegionSection *section;
462 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
464 section = address_space_translate_internal(d, addr, xlat, plen, false);
466 assert(!section->mr->iommu_ops);
467 return section;
469 #endif
471 #if !defined(CONFIG_USER_ONLY)
473 static int cpu_common_post_load(void *opaque, int version_id)
475 CPUState *cpu = opaque;
477 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
478 version_id is increased. */
479 cpu->interrupt_request &= ~0x01;
480 tlb_flush(cpu, 1);
482 return 0;
485 static int cpu_common_pre_load(void *opaque)
487 CPUState *cpu = opaque;
489 cpu->exception_index = -1;
491 return 0;
494 static bool cpu_common_exception_index_needed(void *opaque)
496 CPUState *cpu = opaque;
498 return tcg_enabled() && cpu->exception_index != -1;
501 static const VMStateDescription vmstate_cpu_common_exception_index = {
502 .name = "cpu_common/exception_index",
503 .version_id = 1,
504 .minimum_version_id = 1,
505 .needed = cpu_common_exception_index_needed,
506 .fields = (VMStateField[]) {
507 VMSTATE_INT32(exception_index, CPUState),
508 VMSTATE_END_OF_LIST()
512 static bool cpu_common_crash_occurred_needed(void *opaque)
514 CPUState *cpu = opaque;
516 return cpu->crash_occurred;
519 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
520 .name = "cpu_common/crash_occurred",
521 .version_id = 1,
522 .minimum_version_id = 1,
523 .needed = cpu_common_crash_occurred_needed,
524 .fields = (VMStateField[]) {
525 VMSTATE_BOOL(crash_occurred, CPUState),
526 VMSTATE_END_OF_LIST()
530 const VMStateDescription vmstate_cpu_common = {
531 .name = "cpu_common",
532 .version_id = 1,
533 .minimum_version_id = 1,
534 .pre_load = cpu_common_pre_load,
535 .post_load = cpu_common_post_load,
536 .fields = (VMStateField[]) {
537 VMSTATE_UINT32(halted, CPUState),
538 VMSTATE_UINT32(interrupt_request, CPUState),
539 VMSTATE_END_OF_LIST()
541 .subsections = (const VMStateDescription*[]) {
542 &vmstate_cpu_common_exception_index,
543 &vmstate_cpu_common_crash_occurred,
544 NULL
548 #endif
550 CPUState *qemu_get_cpu(int index)
552 CPUState *cpu;
554 CPU_FOREACH(cpu) {
555 if (cpu->cpu_index == index) {
556 return cpu;
560 return NULL;
563 #if !defined(CONFIG_USER_ONLY)
564 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
566 CPUAddressSpace *newas;
568 /* Target code should have set num_ases before calling us */
569 assert(asidx < cpu->num_ases);
571 if (asidx == 0) {
572 /* address space 0 gets the convenience alias */
573 cpu->as = as;
576 /* KVM cannot currently support multiple address spaces. */
577 assert(asidx == 0 || !kvm_enabled());
579 if (!cpu->cpu_ases) {
580 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
583 newas = &cpu->cpu_ases[asidx];
584 newas->cpu = cpu;
585 newas->as = as;
586 if (tcg_enabled()) {
587 newas->tcg_as_listener.commit = tcg_commit;
588 memory_listener_register(&newas->tcg_as_listener, as);
592 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
594 /* Return the AddressSpace corresponding to the specified index */
595 return cpu->cpu_ases[asidx].as;
597 #endif
599 #ifndef CONFIG_USER_ONLY
600 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
602 static int cpu_get_free_index(Error **errp)
604 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
606 if (cpu >= MAX_CPUMASK_BITS) {
607 error_setg(errp, "Trying to use more CPUs than max of %d",
608 MAX_CPUMASK_BITS);
609 return -1;
612 bitmap_set(cpu_index_map, cpu, 1);
613 return cpu;
616 static void cpu_release_index(CPUState *cpu)
618 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
620 #else
622 static int cpu_get_free_index(Error **errp)
624 CPUState *some_cpu;
625 int cpu_index = 0;
627 CPU_FOREACH(some_cpu) {
628 cpu_index++;
630 return cpu_index;
633 static void cpu_release_index(CPUState *cpu)
635 return;
637 #endif
639 void cpu_exec_exit(CPUState *cpu)
641 CPUClass *cc = CPU_GET_CLASS(cpu);
643 #if defined(CONFIG_USER_ONLY)
644 cpu_list_lock();
645 #endif
646 if (cpu->cpu_index == -1) {
647 /* cpu_index was never allocated by this @cpu or was already freed. */
648 #if defined(CONFIG_USER_ONLY)
649 cpu_list_unlock();
650 #endif
651 return;
654 QTAILQ_REMOVE(&cpus, cpu, node);
655 cpu_release_index(cpu);
656 cpu->cpu_index = -1;
657 #if defined(CONFIG_USER_ONLY)
658 cpu_list_unlock();
659 #endif
661 if (cc->vmsd != NULL) {
662 vmstate_unregister(NULL, cc->vmsd, cpu);
664 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
665 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
669 void cpu_exec_init(CPUState *cpu, Error **errp)
671 CPUClass *cc = CPU_GET_CLASS(cpu);
672 Error *local_err = NULL;
674 cpu->as = NULL;
675 cpu->num_ases = 0;
677 #ifndef CONFIG_USER_ONLY
678 cpu->thread_id = qemu_get_thread_id();
680 /* This is a softmmu CPU object, so create a property for it
681 * so users can wire up its memory. (This can't go in qom/cpu.c
682 * because that file is compiled only once for both user-mode
683 * and system builds.) The default if no link is set up is to use
684 * the system address space.
686 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
687 (Object **)&cpu->memory,
688 qdev_prop_allow_set_link_before_realize,
689 OBJ_PROP_LINK_UNREF_ON_RELEASE,
690 &error_abort);
691 cpu->memory = system_memory;
692 object_ref(OBJECT(cpu->memory));
693 #endif
695 #if defined(CONFIG_USER_ONLY)
696 cpu_list_lock();
697 #endif
698 cpu->cpu_index = cpu_get_free_index(&local_err);
699 if (local_err) {
700 error_propagate(errp, local_err);
701 #if defined(CONFIG_USER_ONLY)
702 cpu_list_unlock();
703 #endif
704 return;
706 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
707 #if defined(CONFIG_USER_ONLY)
708 (void) cc;
709 cpu_list_unlock();
710 #else
711 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
712 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
714 if (cc->vmsd != NULL) {
715 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
717 #endif
720 #if defined(CONFIG_USER_ONLY)
721 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
723 tb_invalidate_phys_page_range(pc, pc + 1, 0);
725 #else
726 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
728 MemTxAttrs attrs;
729 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
730 int asidx = cpu_asidx_from_attrs(cpu, attrs);
731 if (phys != -1) {
732 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
733 phys | (pc & ~TARGET_PAGE_MASK));
736 #endif
738 #if defined(CONFIG_USER_ONLY)
739 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
744 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
745 int flags)
747 return -ENOSYS;
750 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
754 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
755 int flags, CPUWatchpoint **watchpoint)
757 return -ENOSYS;
759 #else
760 /* Add a watchpoint. */
761 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
762 int flags, CPUWatchpoint **watchpoint)
764 CPUWatchpoint *wp;
766 /* forbid ranges which are empty or run off the end of the address space */
767 if (len == 0 || (addr + len - 1) < addr) {
768 error_report("tried to set invalid watchpoint at %"
769 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
770 return -EINVAL;
772 wp = g_malloc(sizeof(*wp));
774 wp->vaddr = addr;
775 wp->len = len;
776 wp->flags = flags;
778 /* keep all GDB-injected watchpoints in front */
779 if (flags & BP_GDB) {
780 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
781 } else {
782 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
785 tlb_flush_page(cpu, addr);
787 if (watchpoint)
788 *watchpoint = wp;
789 return 0;
792 /* Remove a specific watchpoint. */
793 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
794 int flags)
796 CPUWatchpoint *wp;
798 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
799 if (addr == wp->vaddr && len == wp->len
800 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
801 cpu_watchpoint_remove_by_ref(cpu, wp);
802 return 0;
805 return -ENOENT;
808 /* Remove a specific watchpoint by reference. */
809 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
811 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
813 tlb_flush_page(cpu, watchpoint->vaddr);
815 g_free(watchpoint);
818 /* Remove all matching watchpoints. */
819 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
821 CPUWatchpoint *wp, *next;
823 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
824 if (wp->flags & mask) {
825 cpu_watchpoint_remove_by_ref(cpu, wp);
830 /* Return true if this watchpoint address matches the specified
831 * access (ie the address range covered by the watchpoint overlaps
832 * partially or completely with the address range covered by the
833 * access).
835 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
836 vaddr addr,
837 vaddr len)
839 /* We know the lengths are non-zero, but a little caution is
840 * required to avoid errors in the case where the range ends
841 * exactly at the top of the address space and so addr + len
842 * wraps round to zero.
844 vaddr wpend = wp->vaddr + wp->len - 1;
845 vaddr addrend = addr + len - 1;
847 return !(addr > wpend || wp->vaddr > addrend);
850 #endif
852 /* Add a breakpoint. */
853 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
854 CPUBreakpoint **breakpoint)
856 CPUBreakpoint *bp;
858 bp = g_malloc(sizeof(*bp));
860 bp->pc = pc;
861 bp->flags = flags;
863 /* keep all GDB-injected breakpoints in front */
864 if (flags & BP_GDB) {
865 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
866 } else {
867 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
870 breakpoint_invalidate(cpu, pc);
872 if (breakpoint) {
873 *breakpoint = bp;
875 return 0;
878 /* Remove a specific breakpoint. */
879 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
881 CPUBreakpoint *bp;
883 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
884 if (bp->pc == pc && bp->flags == flags) {
885 cpu_breakpoint_remove_by_ref(cpu, bp);
886 return 0;
889 return -ENOENT;
892 /* Remove a specific breakpoint by reference. */
893 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
895 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
897 breakpoint_invalidate(cpu, breakpoint->pc);
899 g_free(breakpoint);
902 /* Remove all matching breakpoints. */
903 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
905 CPUBreakpoint *bp, *next;
907 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
908 if (bp->flags & mask) {
909 cpu_breakpoint_remove_by_ref(cpu, bp);
914 /* enable or disable single step mode. EXCP_DEBUG is returned by the
915 CPU loop after each instruction */
916 void cpu_single_step(CPUState *cpu, int enabled)
918 if (cpu->singlestep_enabled != enabled) {
919 cpu->singlestep_enabled = enabled;
920 if (kvm_enabled()) {
921 kvm_update_guest_debug(cpu, 0);
922 } else {
923 /* must flush all the translated code to avoid inconsistencies */
924 /* XXX: only flush what is necessary */
925 tb_flush(cpu);
930 void cpu_abort(CPUState *cpu, const char *fmt, ...)
932 va_list ap;
933 va_list ap2;
935 va_start(ap, fmt);
936 va_copy(ap2, ap);
937 fprintf(stderr, "qemu: fatal: ");
938 vfprintf(stderr, fmt, ap);
939 fprintf(stderr, "\n");
940 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
941 if (qemu_log_separate()) {
942 qemu_log("qemu: fatal: ");
943 qemu_log_vprintf(fmt, ap2);
944 qemu_log("\n");
945 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
946 qemu_log_flush();
947 qemu_log_close();
949 va_end(ap2);
950 va_end(ap);
951 replay_finish();
952 #if defined(CONFIG_USER_ONLY)
954 struct sigaction act;
955 sigfillset(&act.sa_mask);
956 act.sa_handler = SIG_DFL;
957 sigaction(SIGABRT, &act, NULL);
959 #endif
960 abort();
963 #if !defined(CONFIG_USER_ONLY)
964 /* Called from RCU critical section */
965 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
967 RAMBlock *block;
969 block = atomic_rcu_read(&ram_list.mru_block);
970 if (block && addr - block->offset < block->max_length) {
971 return block;
973 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
974 if (addr - block->offset < block->max_length) {
975 goto found;
979 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
980 abort();
982 found:
983 /* It is safe to write mru_block outside the iothread lock. This
984 * is what happens:
986 * mru_block = xxx
987 * rcu_read_unlock()
988 * xxx removed from list
989 * rcu_read_lock()
990 * read mru_block
991 * mru_block = NULL;
992 * call_rcu(reclaim_ramblock, xxx);
993 * rcu_read_unlock()
995 * atomic_rcu_set is not needed here. The block was already published
996 * when it was placed into the list. Here we're just making an extra
997 * copy of the pointer.
999 ram_list.mru_block = block;
1000 return block;
1003 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
1005 CPUState *cpu;
1006 ram_addr_t start1;
1007 RAMBlock *block;
1008 ram_addr_t end;
1010 end = TARGET_PAGE_ALIGN(start + length);
1011 start &= TARGET_PAGE_MASK;
1013 rcu_read_lock();
1014 block = qemu_get_ram_block(start);
1015 assert(block == qemu_get_ram_block(end - 1));
1016 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
1017 CPU_FOREACH(cpu) {
1018 tlb_reset_dirty(cpu, start1, length);
1020 rcu_read_unlock();
1023 /* Note: start and end must be within the same ram block. */
1024 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
1025 ram_addr_t length,
1026 unsigned client)
1028 DirtyMemoryBlocks *blocks;
1029 unsigned long end, page;
1030 bool dirty = false;
1032 if (length == 0) {
1033 return false;
1036 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1037 page = start >> TARGET_PAGE_BITS;
1039 rcu_read_lock();
1041 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1043 while (page < end) {
1044 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1045 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1046 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1048 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1049 offset, num);
1050 page += num;
1053 rcu_read_unlock();
1055 if (dirty && tcg_enabled()) {
1056 tlb_reset_dirty_range_all(start, length);
1059 return dirty;
1062 /* Called from RCU critical section */
1063 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1064 MemoryRegionSection *section,
1065 target_ulong vaddr,
1066 hwaddr paddr, hwaddr xlat,
1067 int prot,
1068 target_ulong *address)
1070 hwaddr iotlb;
1071 CPUWatchpoint *wp;
1073 if (memory_region_is_ram(section->mr)) {
1074 /* Normal RAM. */
1075 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1076 if (!section->readonly) {
1077 iotlb |= PHYS_SECTION_NOTDIRTY;
1078 } else {
1079 iotlb |= PHYS_SECTION_ROM;
1081 } else {
1082 AddressSpaceDispatch *d;
1084 d = atomic_rcu_read(&section->address_space->dispatch);
1085 iotlb = section - d->map.sections;
1086 iotlb += xlat;
1089 /* Make accesses to pages with watchpoints go via the
1090 watchpoint trap routines. */
1091 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1092 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1093 /* Avoid trapping reads of pages with a write breakpoint. */
1094 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1095 iotlb = PHYS_SECTION_WATCH + paddr;
1096 *address |= TLB_MMIO;
1097 break;
1102 return iotlb;
1104 #endif /* defined(CONFIG_USER_ONLY) */
1106 #if !defined(CONFIG_USER_ONLY)
1108 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1109 uint16_t section);
1110 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1112 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1113 qemu_anon_ram_alloc;
1116 * Set a custom physical guest memory alloator.
1117 * Accelerators with unusual needs may need this. Hopefully, we can
1118 * get rid of it eventually.
1120 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1122 phys_mem_alloc = alloc;
1125 static uint16_t phys_section_add(PhysPageMap *map,
1126 MemoryRegionSection *section)
1128 /* The physical section number is ORed with a page-aligned
1129 * pointer to produce the iotlb entries. Thus it should
1130 * never overflow into the page-aligned value.
1132 assert(map->sections_nb < TARGET_PAGE_SIZE);
1134 if (map->sections_nb == map->sections_nb_alloc) {
1135 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1136 map->sections = g_renew(MemoryRegionSection, map->sections,
1137 map->sections_nb_alloc);
1139 map->sections[map->sections_nb] = *section;
1140 memory_region_ref(section->mr);
1141 return map->sections_nb++;
1144 static void phys_section_destroy(MemoryRegion *mr)
1146 bool have_sub_page = mr->subpage;
1148 memory_region_unref(mr);
1150 if (have_sub_page) {
1151 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1152 object_unref(OBJECT(&subpage->iomem));
1153 g_free(subpage);
1157 static void phys_sections_free(PhysPageMap *map)
1159 while (map->sections_nb > 0) {
1160 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1161 phys_section_destroy(section->mr);
1163 g_free(map->sections);
1164 g_free(map->nodes);
1167 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1169 subpage_t *subpage;
1170 hwaddr base = section->offset_within_address_space
1171 & TARGET_PAGE_MASK;
1172 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1173 d->map.nodes, d->map.sections);
1174 MemoryRegionSection subsection = {
1175 .offset_within_address_space = base,
1176 .size = int128_make64(TARGET_PAGE_SIZE),
1178 hwaddr start, end;
1180 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1182 if (!(existing->mr->subpage)) {
1183 subpage = subpage_init(d->as, base);
1184 subsection.address_space = d->as;
1185 subsection.mr = &subpage->iomem;
1186 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1187 phys_section_add(&d->map, &subsection));
1188 } else {
1189 subpage = container_of(existing->mr, subpage_t, iomem);
1191 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1192 end = start + int128_get64(section->size) - 1;
1193 subpage_register(subpage, start, end,
1194 phys_section_add(&d->map, section));
1198 static void register_multipage(AddressSpaceDispatch *d,
1199 MemoryRegionSection *section)
1201 hwaddr start_addr = section->offset_within_address_space;
1202 uint16_t section_index = phys_section_add(&d->map, section);
1203 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1204 TARGET_PAGE_BITS));
1206 assert(num_pages);
1207 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1210 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1212 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1213 AddressSpaceDispatch *d = as->next_dispatch;
1214 MemoryRegionSection now = *section, remain = *section;
1215 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1217 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1218 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1219 - now.offset_within_address_space;
1221 now.size = int128_min(int128_make64(left), now.size);
1222 register_subpage(d, &now);
1223 } else {
1224 now.size = int128_zero();
1226 while (int128_ne(remain.size, now.size)) {
1227 remain.size = int128_sub(remain.size, now.size);
1228 remain.offset_within_address_space += int128_get64(now.size);
1229 remain.offset_within_region += int128_get64(now.size);
1230 now = remain;
1231 if (int128_lt(remain.size, page_size)) {
1232 register_subpage(d, &now);
1233 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1234 now.size = page_size;
1235 register_subpage(d, &now);
1236 } else {
1237 now.size = int128_and(now.size, int128_neg(page_size));
1238 register_multipage(d, &now);
1243 void qemu_flush_coalesced_mmio_buffer(void)
1245 if (kvm_enabled())
1246 kvm_flush_coalesced_mmio_buffer();
1249 void qemu_mutex_lock_ramlist(void)
1251 qemu_mutex_lock(&ram_list.mutex);
1254 void qemu_mutex_unlock_ramlist(void)
1256 qemu_mutex_unlock(&ram_list.mutex);
1259 #ifdef __linux__
1260 static void *file_ram_alloc(RAMBlock *block,
1261 ram_addr_t memory,
1262 const char *path,
1263 Error **errp)
1265 bool unlink_on_error = false;
1266 char *filename;
1267 char *sanitized_name;
1268 char *c;
1269 void *area;
1270 int fd = -1;
1271 int64_t page_size;
1273 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1274 error_setg(errp,
1275 "host lacks kvm mmu notifiers, -mem-path unsupported");
1276 return NULL;
1279 for (;;) {
1280 fd = open(path, O_RDWR);
1281 if (fd >= 0) {
1282 /* @path names an existing file, use it */
1283 break;
1285 if (errno == ENOENT) {
1286 /* @path names a file that doesn't exist, create it */
1287 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1288 if (fd >= 0) {
1289 unlink_on_error = true;
1290 break;
1292 } else if (errno == EISDIR) {
1293 /* @path names a directory, create a file there */
1294 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1295 sanitized_name = g_strdup(memory_region_name(block->mr));
1296 for (c = sanitized_name; *c != '\0'; c++) {
1297 if (*c == '/') {
1298 *c = '_';
1302 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1303 sanitized_name);
1304 g_free(sanitized_name);
1306 fd = mkstemp(filename);
1307 if (fd >= 0) {
1308 unlink(filename);
1309 g_free(filename);
1310 break;
1312 g_free(filename);
1314 if (errno != EEXIST && errno != EINTR) {
1315 error_setg_errno(errp, errno,
1316 "can't open backing store %s for guest RAM",
1317 path);
1318 goto error;
1321 * Try again on EINTR and EEXIST. The latter happens when
1322 * something else creates the file between our two open().
1326 page_size = qemu_fd_getpagesize(fd);
1327 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1329 if (memory < page_size) {
1330 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1331 "or larger than page size 0x%" PRIx64,
1332 memory, page_size);
1333 goto error;
1336 memory = ROUND_UP(memory, page_size);
1339 * ftruncate is not supported by hugetlbfs in older
1340 * hosts, so don't bother bailing out on errors.
1341 * If anything goes wrong with it under other filesystems,
1342 * mmap will fail.
1344 if (ftruncate(fd, memory)) {
1345 perror("ftruncate");
1348 area = qemu_ram_mmap(fd, memory, block->mr->align,
1349 block->flags & RAM_SHARED);
1350 if (area == MAP_FAILED) {
1351 error_setg_errno(errp, errno,
1352 "unable to map backing store for guest RAM");
1353 goto error;
1356 if (mem_prealloc) {
1357 os_mem_prealloc(fd, area, memory);
1360 block->fd = fd;
1361 return area;
1363 error:
1364 if (unlink_on_error) {
1365 unlink(path);
1367 if (fd != -1) {
1368 close(fd);
1370 return NULL;
1372 #endif
1374 /* Called with the ramlist lock held. */
1375 static ram_addr_t find_ram_offset(ram_addr_t size)
1377 RAMBlock *block, *next_block;
1378 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1380 assert(size != 0); /* it would hand out same offset multiple times */
1382 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1383 return 0;
1386 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1387 ram_addr_t end, next = RAM_ADDR_MAX;
1389 end = block->offset + block->max_length;
1391 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1392 if (next_block->offset >= end) {
1393 next = MIN(next, next_block->offset);
1396 if (next - end >= size && next - end < mingap) {
1397 offset = end;
1398 mingap = next - end;
1402 if (offset == RAM_ADDR_MAX) {
1403 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1404 (uint64_t)size);
1405 abort();
1408 return offset;
1411 ram_addr_t last_ram_offset(void)
1413 RAMBlock *block;
1414 ram_addr_t last = 0;
1416 rcu_read_lock();
1417 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1418 last = MAX(last, block->offset + block->max_length);
1420 rcu_read_unlock();
1421 return last;
1424 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1426 int ret;
1428 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1429 if (!machine_dump_guest_core(current_machine)) {
1430 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1431 if (ret) {
1432 perror("qemu_madvise");
1433 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1434 "but dump_guest_core=off specified\n");
1439 const char *qemu_ram_get_idstr(RAMBlock *rb)
1441 return rb->idstr;
1444 /* Called with iothread lock held. */
1445 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1447 RAMBlock *block;
1449 assert(new_block);
1450 assert(!new_block->idstr[0]);
1452 if (dev) {
1453 char *id = qdev_get_dev_path(dev);
1454 if (id) {
1455 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1456 g_free(id);
1459 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1461 rcu_read_lock();
1462 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1463 if (block != new_block &&
1464 !strcmp(block->idstr, new_block->idstr)) {
1465 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1466 new_block->idstr);
1467 abort();
1470 rcu_read_unlock();
1473 /* Called with iothread lock held. */
1474 void qemu_ram_unset_idstr(RAMBlock *block)
1476 /* FIXME: arch_init.c assumes that this is not called throughout
1477 * migration. Ignore the problem since hot-unplug during migration
1478 * does not work anyway.
1480 if (block) {
1481 memset(block->idstr, 0, sizeof(block->idstr));
1485 static int memory_try_enable_merging(void *addr, size_t len)
1487 if (!machine_mem_merge(current_machine)) {
1488 /* disabled by the user */
1489 return 0;
1492 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1495 /* Only legal before guest might have detected the memory size: e.g. on
1496 * incoming migration, or right after reset.
1498 * As memory core doesn't know how is memory accessed, it is up to
1499 * resize callback to update device state and/or add assertions to detect
1500 * misuse, if necessary.
1502 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1504 assert(block);
1506 newsize = HOST_PAGE_ALIGN(newsize);
1508 if (block->used_length == newsize) {
1509 return 0;
1512 if (!(block->flags & RAM_RESIZEABLE)) {
1513 error_setg_errno(errp, EINVAL,
1514 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1515 " in != 0x" RAM_ADDR_FMT, block->idstr,
1516 newsize, block->used_length);
1517 return -EINVAL;
1520 if (block->max_length < newsize) {
1521 error_setg_errno(errp, EINVAL,
1522 "Length too large: %s: 0x" RAM_ADDR_FMT
1523 " > 0x" RAM_ADDR_FMT, block->idstr,
1524 newsize, block->max_length);
1525 return -EINVAL;
1528 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1529 block->used_length = newsize;
1530 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1531 DIRTY_CLIENTS_ALL);
1532 memory_region_set_size(block->mr, newsize);
1533 if (block->resized) {
1534 block->resized(block->idstr, newsize, block->host);
1536 return 0;
1539 /* Called with ram_list.mutex held */
1540 static void dirty_memory_extend(ram_addr_t old_ram_size,
1541 ram_addr_t new_ram_size)
1543 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1544 DIRTY_MEMORY_BLOCK_SIZE);
1545 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1546 DIRTY_MEMORY_BLOCK_SIZE);
1547 int i;
1549 /* Only need to extend if block count increased */
1550 if (new_num_blocks <= old_num_blocks) {
1551 return;
1554 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1555 DirtyMemoryBlocks *old_blocks;
1556 DirtyMemoryBlocks *new_blocks;
1557 int j;
1559 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1560 new_blocks = g_malloc(sizeof(*new_blocks) +
1561 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1563 if (old_num_blocks) {
1564 memcpy(new_blocks->blocks, old_blocks->blocks,
1565 old_num_blocks * sizeof(old_blocks->blocks[0]));
1568 for (j = old_num_blocks; j < new_num_blocks; j++) {
1569 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1572 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1574 if (old_blocks) {
1575 g_free_rcu(old_blocks, rcu);
1580 static void ram_block_add(RAMBlock *new_block, Error **errp)
1582 RAMBlock *block;
1583 RAMBlock *last_block = NULL;
1584 ram_addr_t old_ram_size, new_ram_size;
1585 Error *err = NULL;
1587 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1589 qemu_mutex_lock_ramlist();
1590 new_block->offset = find_ram_offset(new_block->max_length);
1592 if (!new_block->host) {
1593 if (xen_enabled()) {
1594 xen_ram_alloc(new_block->offset, new_block->max_length,
1595 new_block->mr, &err);
1596 if (err) {
1597 error_propagate(errp, err);
1598 qemu_mutex_unlock_ramlist();
1599 return;
1601 } else {
1602 new_block->host = phys_mem_alloc(new_block->max_length,
1603 &new_block->mr->align);
1604 if (!new_block->host) {
1605 error_setg_errno(errp, errno,
1606 "cannot set up guest memory '%s'",
1607 memory_region_name(new_block->mr));
1608 qemu_mutex_unlock_ramlist();
1609 return;
1611 memory_try_enable_merging(new_block->host, new_block->max_length);
1615 new_ram_size = MAX(old_ram_size,
1616 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1617 if (new_ram_size > old_ram_size) {
1618 migration_bitmap_extend(old_ram_size, new_ram_size);
1619 dirty_memory_extend(old_ram_size, new_ram_size);
1621 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1622 * QLIST (which has an RCU-friendly variant) does not have insertion at
1623 * tail, so save the last element in last_block.
1625 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1626 last_block = block;
1627 if (block->max_length < new_block->max_length) {
1628 break;
1631 if (block) {
1632 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1633 } else if (last_block) {
1634 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1635 } else { /* list is empty */
1636 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1638 ram_list.mru_block = NULL;
1640 /* Write list before version */
1641 smp_wmb();
1642 ram_list.version++;
1643 qemu_mutex_unlock_ramlist();
1645 cpu_physical_memory_set_dirty_range(new_block->offset,
1646 new_block->used_length,
1647 DIRTY_CLIENTS_ALL);
1649 if (new_block->host) {
1650 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1651 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1652 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1653 if (kvm_enabled()) {
1654 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1659 #ifdef __linux__
1660 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1661 bool share, const char *mem_path,
1662 Error **errp)
1664 RAMBlock *new_block;
1665 Error *local_err = NULL;
1667 if (xen_enabled()) {
1668 error_setg(errp, "-mem-path not supported with Xen");
1669 return NULL;
1672 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1674 * file_ram_alloc() needs to allocate just like
1675 * phys_mem_alloc, but we haven't bothered to provide
1676 * a hook there.
1678 error_setg(errp,
1679 "-mem-path not supported with this accelerator");
1680 return NULL;
1683 size = HOST_PAGE_ALIGN(size);
1684 new_block = g_malloc0(sizeof(*new_block));
1685 new_block->mr = mr;
1686 new_block->used_length = size;
1687 new_block->max_length = size;
1688 new_block->flags = share ? RAM_SHARED : 0;
1689 new_block->host = file_ram_alloc(new_block, size,
1690 mem_path, errp);
1691 if (!new_block->host) {
1692 g_free(new_block);
1693 return NULL;
1696 ram_block_add(new_block, &local_err);
1697 if (local_err) {
1698 g_free(new_block);
1699 error_propagate(errp, local_err);
1700 return NULL;
1702 return new_block;
1704 #endif
1706 static
1707 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1708 void (*resized)(const char*,
1709 uint64_t length,
1710 void *host),
1711 void *host, bool resizeable,
1712 MemoryRegion *mr, Error **errp)
1714 RAMBlock *new_block;
1715 Error *local_err = NULL;
1717 size = HOST_PAGE_ALIGN(size);
1718 max_size = HOST_PAGE_ALIGN(max_size);
1719 new_block = g_malloc0(sizeof(*new_block));
1720 new_block->mr = mr;
1721 new_block->resized = resized;
1722 new_block->used_length = size;
1723 new_block->max_length = max_size;
1724 assert(max_size >= size);
1725 new_block->fd = -1;
1726 new_block->host = host;
1727 if (host) {
1728 new_block->flags |= RAM_PREALLOC;
1730 if (resizeable) {
1731 new_block->flags |= RAM_RESIZEABLE;
1733 ram_block_add(new_block, &local_err);
1734 if (local_err) {
1735 g_free(new_block);
1736 error_propagate(errp, local_err);
1737 return NULL;
1739 return new_block;
1742 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1743 MemoryRegion *mr, Error **errp)
1745 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1748 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1750 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1753 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1754 void (*resized)(const char*,
1755 uint64_t length,
1756 void *host),
1757 MemoryRegion *mr, Error **errp)
1759 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1762 static void reclaim_ramblock(RAMBlock *block)
1764 if (block->flags & RAM_PREALLOC) {
1766 } else if (xen_enabled()) {
1767 xen_invalidate_map_cache_entry(block->host);
1768 #ifndef _WIN32
1769 } else if (block->fd >= 0) {
1770 qemu_ram_munmap(block->host, block->max_length);
1771 close(block->fd);
1772 #endif
1773 } else {
1774 qemu_anon_ram_free(block->host, block->max_length);
1776 g_free(block);
1779 void qemu_ram_free(RAMBlock *block)
1781 if (!block) {
1782 return;
1785 qemu_mutex_lock_ramlist();
1786 QLIST_REMOVE_RCU(block, next);
1787 ram_list.mru_block = NULL;
1788 /* Write list before version */
1789 smp_wmb();
1790 ram_list.version++;
1791 call_rcu(block, reclaim_ramblock, rcu);
1792 qemu_mutex_unlock_ramlist();
1795 #ifndef _WIN32
1796 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1798 RAMBlock *block;
1799 ram_addr_t offset;
1800 int flags;
1801 void *area, *vaddr;
1803 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1804 offset = addr - block->offset;
1805 if (offset < block->max_length) {
1806 vaddr = ramblock_ptr(block, offset);
1807 if (block->flags & RAM_PREALLOC) {
1809 } else if (xen_enabled()) {
1810 abort();
1811 } else {
1812 flags = MAP_FIXED;
1813 if (block->fd >= 0) {
1814 flags |= (block->flags & RAM_SHARED ?
1815 MAP_SHARED : MAP_PRIVATE);
1816 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1817 flags, block->fd, offset);
1818 } else {
1820 * Remap needs to match alloc. Accelerators that
1821 * set phys_mem_alloc never remap. If they did,
1822 * we'd need a remap hook here.
1824 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1826 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1827 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1828 flags, -1, 0);
1830 if (area != vaddr) {
1831 fprintf(stderr, "Could not remap addr: "
1832 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1833 length, addr);
1834 exit(1);
1836 memory_try_enable_merging(vaddr, length);
1837 qemu_ram_setup_dump(vaddr, length);
1842 #endif /* !_WIN32 */
1844 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1845 * This should not be used for general purpose DMA. Use address_space_map
1846 * or address_space_rw instead. For local memory (e.g. video ram) that the
1847 * device owns, use memory_region_get_ram_ptr.
1849 * Called within RCU critical section.
1851 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1853 RAMBlock *block = ram_block;
1855 if (block == NULL) {
1856 block = qemu_get_ram_block(addr);
1857 addr -= block->offset;
1860 if (xen_enabled() && block->host == NULL) {
1861 /* We need to check if the requested address is in the RAM
1862 * because we don't want to map the entire memory in QEMU.
1863 * In that case just map until the end of the page.
1865 if (block->offset == 0) {
1866 return xen_map_cache(addr, 0, 0);
1869 block->host = xen_map_cache(block->offset, block->max_length, 1);
1871 return ramblock_ptr(block, addr);
1874 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1875 * but takes a size argument.
1877 * Called within RCU critical section.
1879 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1880 hwaddr *size)
1882 RAMBlock *block = ram_block;
1883 if (*size == 0) {
1884 return NULL;
1887 if (block == NULL) {
1888 block = qemu_get_ram_block(addr);
1889 addr -= block->offset;
1891 *size = MIN(*size, block->max_length - addr);
1893 if (xen_enabled() && block->host == NULL) {
1894 /* We need to check if the requested address is in the RAM
1895 * because we don't want to map the entire memory in QEMU.
1896 * In that case just map the requested area.
1898 if (block->offset == 0) {
1899 return xen_map_cache(addr, *size, 1);
1902 block->host = xen_map_cache(block->offset, block->max_length, 1);
1905 return ramblock_ptr(block, addr);
1909 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1910 * in that RAMBlock.
1912 * ptr: Host pointer to look up
1913 * round_offset: If true round the result offset down to a page boundary
1914 * *ram_addr: set to result ram_addr
1915 * *offset: set to result offset within the RAMBlock
1917 * Returns: RAMBlock (or NULL if not found)
1919 * By the time this function returns, the returned pointer is not protected
1920 * by RCU anymore. If the caller is not within an RCU critical section and
1921 * does not hold the iothread lock, it must have other means of protecting the
1922 * pointer, such as a reference to the region that includes the incoming
1923 * ram_addr_t.
1925 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1926 ram_addr_t *offset)
1928 RAMBlock *block;
1929 uint8_t *host = ptr;
1931 if (xen_enabled()) {
1932 ram_addr_t ram_addr;
1933 rcu_read_lock();
1934 ram_addr = xen_ram_addr_from_mapcache(ptr);
1935 block = qemu_get_ram_block(ram_addr);
1936 if (block) {
1937 *offset = ram_addr - block->offset;
1939 rcu_read_unlock();
1940 return block;
1943 rcu_read_lock();
1944 block = atomic_rcu_read(&ram_list.mru_block);
1945 if (block && block->host && host - block->host < block->max_length) {
1946 goto found;
1949 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1950 /* This case append when the block is not mapped. */
1951 if (block->host == NULL) {
1952 continue;
1954 if (host - block->host < block->max_length) {
1955 goto found;
1959 rcu_read_unlock();
1960 return NULL;
1962 found:
1963 *offset = (host - block->host);
1964 if (round_offset) {
1965 *offset &= TARGET_PAGE_MASK;
1967 rcu_read_unlock();
1968 return block;
1972 * Finds the named RAMBlock
1974 * name: The name of RAMBlock to find
1976 * Returns: RAMBlock (or NULL if not found)
1978 RAMBlock *qemu_ram_block_by_name(const char *name)
1980 RAMBlock *block;
1982 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1983 if (!strcmp(name, block->idstr)) {
1984 return block;
1988 return NULL;
1991 /* Some of the softmmu routines need to translate from a host pointer
1992 (typically a TLB entry) back to a ram offset. */
1993 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1995 RAMBlock *block;
1996 ram_addr_t offset;
1998 block = qemu_ram_block_from_host(ptr, false, &offset);
1999 if (!block) {
2000 return RAM_ADDR_INVALID;
2003 return block->offset + offset;
2006 /* Called within RCU critical section. */
2007 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2008 uint64_t val, unsigned size)
2010 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2011 tb_invalidate_phys_page_fast(ram_addr, size);
2013 switch (size) {
2014 case 1:
2015 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2016 break;
2017 case 2:
2018 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2019 break;
2020 case 4:
2021 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2022 break;
2023 default:
2024 abort();
2026 /* Set both VGA and migration bits for simplicity and to remove
2027 * the notdirty callback faster.
2029 cpu_physical_memory_set_dirty_range(ram_addr, size,
2030 DIRTY_CLIENTS_NOCODE);
2031 /* we remove the notdirty callback only if the code has been
2032 flushed */
2033 if (!cpu_physical_memory_is_clean(ram_addr)) {
2034 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2038 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2039 unsigned size, bool is_write)
2041 return is_write;
2044 static const MemoryRegionOps notdirty_mem_ops = {
2045 .write = notdirty_mem_write,
2046 .valid.accepts = notdirty_mem_accepts,
2047 .endianness = DEVICE_NATIVE_ENDIAN,
2050 /* Generate a debug exception if a watchpoint has been hit. */
2051 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2053 CPUState *cpu = current_cpu;
2054 CPUClass *cc = CPU_GET_CLASS(cpu);
2055 CPUArchState *env = cpu->env_ptr;
2056 target_ulong pc, cs_base;
2057 target_ulong vaddr;
2058 CPUWatchpoint *wp;
2059 uint32_t cpu_flags;
2061 if (cpu->watchpoint_hit) {
2062 /* We re-entered the check after replacing the TB. Now raise
2063 * the debug interrupt so that is will trigger after the
2064 * current instruction. */
2065 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2066 return;
2068 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2069 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2070 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2071 && (wp->flags & flags)) {
2072 if (flags == BP_MEM_READ) {
2073 wp->flags |= BP_WATCHPOINT_HIT_READ;
2074 } else {
2075 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2077 wp->hitaddr = vaddr;
2078 wp->hitattrs = attrs;
2079 if (!cpu->watchpoint_hit) {
2080 if (wp->flags & BP_CPU &&
2081 !cc->debug_check_watchpoint(cpu, wp)) {
2082 wp->flags &= ~BP_WATCHPOINT_HIT;
2083 continue;
2085 cpu->watchpoint_hit = wp;
2086 tb_check_watchpoint(cpu);
2087 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2088 cpu->exception_index = EXCP_DEBUG;
2089 cpu_loop_exit(cpu);
2090 } else {
2091 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2092 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2093 cpu_loop_exit_noexc(cpu);
2096 } else {
2097 wp->flags &= ~BP_WATCHPOINT_HIT;
2102 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2103 so these check for a hit then pass through to the normal out-of-line
2104 phys routines. */
2105 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2106 unsigned size, MemTxAttrs attrs)
2108 MemTxResult res;
2109 uint64_t data;
2110 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2111 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2113 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2114 switch (size) {
2115 case 1:
2116 data = address_space_ldub(as, addr, attrs, &res);
2117 break;
2118 case 2:
2119 data = address_space_lduw(as, addr, attrs, &res);
2120 break;
2121 case 4:
2122 data = address_space_ldl(as, addr, attrs, &res);
2123 break;
2124 default: abort();
2126 *pdata = data;
2127 return res;
2130 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2131 uint64_t val, unsigned size,
2132 MemTxAttrs attrs)
2134 MemTxResult res;
2135 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2136 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2138 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2139 switch (size) {
2140 case 1:
2141 address_space_stb(as, addr, val, attrs, &res);
2142 break;
2143 case 2:
2144 address_space_stw(as, addr, val, attrs, &res);
2145 break;
2146 case 4:
2147 address_space_stl(as, addr, val, attrs, &res);
2148 break;
2149 default: abort();
2151 return res;
2154 static const MemoryRegionOps watch_mem_ops = {
2155 .read_with_attrs = watch_mem_read,
2156 .write_with_attrs = watch_mem_write,
2157 .endianness = DEVICE_NATIVE_ENDIAN,
2160 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2161 unsigned len, MemTxAttrs attrs)
2163 subpage_t *subpage = opaque;
2164 uint8_t buf[8];
2165 MemTxResult res;
2167 #if defined(DEBUG_SUBPAGE)
2168 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2169 subpage, len, addr);
2170 #endif
2171 res = address_space_read(subpage->as, addr + subpage->base,
2172 attrs, buf, len);
2173 if (res) {
2174 return res;
2176 switch (len) {
2177 case 1:
2178 *data = ldub_p(buf);
2179 return MEMTX_OK;
2180 case 2:
2181 *data = lduw_p(buf);
2182 return MEMTX_OK;
2183 case 4:
2184 *data = ldl_p(buf);
2185 return MEMTX_OK;
2186 case 8:
2187 *data = ldq_p(buf);
2188 return MEMTX_OK;
2189 default:
2190 abort();
2194 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2195 uint64_t value, unsigned len, MemTxAttrs attrs)
2197 subpage_t *subpage = opaque;
2198 uint8_t buf[8];
2200 #if defined(DEBUG_SUBPAGE)
2201 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2202 " value %"PRIx64"\n",
2203 __func__, subpage, len, addr, value);
2204 #endif
2205 switch (len) {
2206 case 1:
2207 stb_p(buf, value);
2208 break;
2209 case 2:
2210 stw_p(buf, value);
2211 break;
2212 case 4:
2213 stl_p(buf, value);
2214 break;
2215 case 8:
2216 stq_p(buf, value);
2217 break;
2218 default:
2219 abort();
2221 return address_space_write(subpage->as, addr + subpage->base,
2222 attrs, buf, len);
2225 static bool subpage_accepts(void *opaque, hwaddr addr,
2226 unsigned len, bool is_write)
2228 subpage_t *subpage = opaque;
2229 #if defined(DEBUG_SUBPAGE)
2230 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2231 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2232 #endif
2234 return address_space_access_valid(subpage->as, addr + subpage->base,
2235 len, is_write);
2238 static const MemoryRegionOps subpage_ops = {
2239 .read_with_attrs = subpage_read,
2240 .write_with_attrs = subpage_write,
2241 .impl.min_access_size = 1,
2242 .impl.max_access_size = 8,
2243 .valid.min_access_size = 1,
2244 .valid.max_access_size = 8,
2245 .valid.accepts = subpage_accepts,
2246 .endianness = DEVICE_NATIVE_ENDIAN,
2249 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2250 uint16_t section)
2252 int idx, eidx;
2254 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2255 return -1;
2256 idx = SUBPAGE_IDX(start);
2257 eidx = SUBPAGE_IDX(end);
2258 #if defined(DEBUG_SUBPAGE)
2259 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2260 __func__, mmio, start, end, idx, eidx, section);
2261 #endif
2262 for (; idx <= eidx; idx++) {
2263 mmio->sub_section[idx] = section;
2266 return 0;
2269 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2271 subpage_t *mmio;
2273 mmio = g_malloc0(sizeof(subpage_t));
2275 mmio->as = as;
2276 mmio->base = base;
2277 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2278 NULL, TARGET_PAGE_SIZE);
2279 mmio->iomem.subpage = true;
2280 #if defined(DEBUG_SUBPAGE)
2281 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2282 mmio, base, TARGET_PAGE_SIZE);
2283 #endif
2284 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2286 return mmio;
2289 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2290 MemoryRegion *mr)
2292 assert(as);
2293 MemoryRegionSection section = {
2294 .address_space = as,
2295 .mr = mr,
2296 .offset_within_address_space = 0,
2297 .offset_within_region = 0,
2298 .size = int128_2_64(),
2301 return phys_section_add(map, &section);
2304 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2306 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2307 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2308 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2309 MemoryRegionSection *sections = d->map.sections;
2311 return sections[index & ~TARGET_PAGE_MASK].mr;
2314 static void io_mem_init(void)
2316 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2317 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2318 NULL, UINT64_MAX);
2319 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2320 NULL, UINT64_MAX);
2321 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2322 NULL, UINT64_MAX);
2325 static void mem_begin(MemoryListener *listener)
2327 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2328 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2329 uint16_t n;
2331 n = dummy_section(&d->map, as, &io_mem_unassigned);
2332 assert(n == PHYS_SECTION_UNASSIGNED);
2333 n = dummy_section(&d->map, as, &io_mem_notdirty);
2334 assert(n == PHYS_SECTION_NOTDIRTY);
2335 n = dummy_section(&d->map, as, &io_mem_rom);
2336 assert(n == PHYS_SECTION_ROM);
2337 n = dummy_section(&d->map, as, &io_mem_watch);
2338 assert(n == PHYS_SECTION_WATCH);
2340 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2341 d->as = as;
2342 as->next_dispatch = d;
2345 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2347 phys_sections_free(&d->map);
2348 g_free(d);
2351 static void mem_commit(MemoryListener *listener)
2353 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2354 AddressSpaceDispatch *cur = as->dispatch;
2355 AddressSpaceDispatch *next = as->next_dispatch;
2357 phys_page_compact_all(next, next->map.nodes_nb);
2359 atomic_rcu_set(&as->dispatch, next);
2360 if (cur) {
2361 call_rcu(cur, address_space_dispatch_free, rcu);
2365 static void tcg_commit(MemoryListener *listener)
2367 CPUAddressSpace *cpuas;
2368 AddressSpaceDispatch *d;
2370 /* since each CPU stores ram addresses in its TLB cache, we must
2371 reset the modified entries */
2372 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2373 cpu_reloading_memory_map();
2374 /* The CPU and TLB are protected by the iothread lock.
2375 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2376 * may have split the RCU critical section.
2378 d = atomic_rcu_read(&cpuas->as->dispatch);
2379 cpuas->memory_dispatch = d;
2380 tlb_flush(cpuas->cpu, 1);
2383 void address_space_init_dispatch(AddressSpace *as)
2385 as->dispatch = NULL;
2386 as->dispatch_listener = (MemoryListener) {
2387 .begin = mem_begin,
2388 .commit = mem_commit,
2389 .region_add = mem_add,
2390 .region_nop = mem_add,
2391 .priority = 0,
2393 memory_listener_register(&as->dispatch_listener, as);
2396 void address_space_unregister(AddressSpace *as)
2398 memory_listener_unregister(&as->dispatch_listener);
2401 void address_space_destroy_dispatch(AddressSpace *as)
2403 AddressSpaceDispatch *d = as->dispatch;
2405 atomic_rcu_set(&as->dispatch, NULL);
2406 if (d) {
2407 call_rcu(d, address_space_dispatch_free, rcu);
2411 static void memory_map_init(void)
2413 system_memory = g_malloc(sizeof(*system_memory));
2415 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2416 address_space_init(&address_space_memory, system_memory, "memory");
2418 system_io = g_malloc(sizeof(*system_io));
2419 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2420 65536);
2421 address_space_init(&address_space_io, system_io, "I/O");
2424 MemoryRegion *get_system_memory(void)
2426 return system_memory;
2429 MemoryRegion *get_system_io(void)
2431 return system_io;
2434 #endif /* !defined(CONFIG_USER_ONLY) */
2436 /* physical memory access (slow version, mainly for debug) */
2437 #if defined(CONFIG_USER_ONLY)
2438 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2439 uint8_t *buf, int len, int is_write)
2441 int l, flags;
2442 target_ulong page;
2443 void * p;
2445 while (len > 0) {
2446 page = addr & TARGET_PAGE_MASK;
2447 l = (page + TARGET_PAGE_SIZE) - addr;
2448 if (l > len)
2449 l = len;
2450 flags = page_get_flags(page);
2451 if (!(flags & PAGE_VALID))
2452 return -1;
2453 if (is_write) {
2454 if (!(flags & PAGE_WRITE))
2455 return -1;
2456 /* XXX: this code should not depend on lock_user */
2457 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2458 return -1;
2459 memcpy(p, buf, l);
2460 unlock_user(p, addr, l);
2461 } else {
2462 if (!(flags & PAGE_READ))
2463 return -1;
2464 /* XXX: this code should not depend on lock_user */
2465 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2466 return -1;
2467 memcpy(buf, p, l);
2468 unlock_user(p, addr, 0);
2470 len -= l;
2471 buf += l;
2472 addr += l;
2474 return 0;
2477 #else
2479 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2480 hwaddr length)
2482 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2483 addr += memory_region_get_ram_addr(mr);
2485 /* No early return if dirty_log_mask is or becomes 0, because
2486 * cpu_physical_memory_set_dirty_range will still call
2487 * xen_modified_memory.
2489 if (dirty_log_mask) {
2490 dirty_log_mask =
2491 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2493 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2494 tb_invalidate_phys_range(addr, addr + length);
2495 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2497 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2500 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2502 unsigned access_size_max = mr->ops->valid.max_access_size;
2504 /* Regions are assumed to support 1-4 byte accesses unless
2505 otherwise specified. */
2506 if (access_size_max == 0) {
2507 access_size_max = 4;
2510 /* Bound the maximum access by the alignment of the address. */
2511 if (!mr->ops->impl.unaligned) {
2512 unsigned align_size_max = addr & -addr;
2513 if (align_size_max != 0 && align_size_max < access_size_max) {
2514 access_size_max = align_size_max;
2518 /* Don't attempt accesses larger than the maximum. */
2519 if (l > access_size_max) {
2520 l = access_size_max;
2522 l = pow2floor(l);
2524 return l;
2527 static bool prepare_mmio_access(MemoryRegion *mr)
2529 bool unlocked = !qemu_mutex_iothread_locked();
2530 bool release_lock = false;
2532 if (unlocked && mr->global_locking) {
2533 qemu_mutex_lock_iothread();
2534 unlocked = false;
2535 release_lock = true;
2537 if (mr->flush_coalesced_mmio) {
2538 if (unlocked) {
2539 qemu_mutex_lock_iothread();
2541 qemu_flush_coalesced_mmio_buffer();
2542 if (unlocked) {
2543 qemu_mutex_unlock_iothread();
2547 return release_lock;
2550 /* Called within RCU critical section. */
2551 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2552 MemTxAttrs attrs,
2553 const uint8_t *buf,
2554 int len, hwaddr addr1,
2555 hwaddr l, MemoryRegion *mr)
2557 uint8_t *ptr;
2558 uint64_t val;
2559 MemTxResult result = MEMTX_OK;
2560 bool release_lock = false;
2562 for (;;) {
2563 if (!memory_access_is_direct(mr, true)) {
2564 release_lock |= prepare_mmio_access(mr);
2565 l = memory_access_size(mr, l, addr1);
2566 /* XXX: could force current_cpu to NULL to avoid
2567 potential bugs */
2568 switch (l) {
2569 case 8:
2570 /* 64 bit write access */
2571 val = ldq_p(buf);
2572 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2573 attrs);
2574 break;
2575 case 4:
2576 /* 32 bit write access */
2577 val = ldl_p(buf);
2578 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2579 attrs);
2580 break;
2581 case 2:
2582 /* 16 bit write access */
2583 val = lduw_p(buf);
2584 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2585 attrs);
2586 break;
2587 case 1:
2588 /* 8 bit write access */
2589 val = ldub_p(buf);
2590 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2591 attrs);
2592 break;
2593 default:
2594 abort();
2596 } else {
2597 /* RAM case */
2598 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2599 memcpy(ptr, buf, l);
2600 invalidate_and_set_dirty(mr, addr1, l);
2603 if (release_lock) {
2604 qemu_mutex_unlock_iothread();
2605 release_lock = false;
2608 len -= l;
2609 buf += l;
2610 addr += l;
2612 if (!len) {
2613 break;
2616 l = len;
2617 mr = address_space_translate(as, addr, &addr1, &l, true);
2620 return result;
2623 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2624 const uint8_t *buf, int len)
2626 hwaddr l;
2627 hwaddr addr1;
2628 MemoryRegion *mr;
2629 MemTxResult result = MEMTX_OK;
2631 if (len > 0) {
2632 rcu_read_lock();
2633 l = len;
2634 mr = address_space_translate(as, addr, &addr1, &l, true);
2635 result = address_space_write_continue(as, addr, attrs, buf, len,
2636 addr1, l, mr);
2637 rcu_read_unlock();
2640 return result;
2643 /* Called within RCU critical section. */
2644 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2645 MemTxAttrs attrs, uint8_t *buf,
2646 int len, hwaddr addr1, hwaddr l,
2647 MemoryRegion *mr)
2649 uint8_t *ptr;
2650 uint64_t val;
2651 MemTxResult result = MEMTX_OK;
2652 bool release_lock = false;
2654 for (;;) {
2655 if (!memory_access_is_direct(mr, false)) {
2656 /* I/O case */
2657 release_lock |= prepare_mmio_access(mr);
2658 l = memory_access_size(mr, l, addr1);
2659 switch (l) {
2660 case 8:
2661 /* 64 bit read access */
2662 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2663 attrs);
2664 stq_p(buf, val);
2665 break;
2666 case 4:
2667 /* 32 bit read access */
2668 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2669 attrs);
2670 stl_p(buf, val);
2671 break;
2672 case 2:
2673 /* 16 bit read access */
2674 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2675 attrs);
2676 stw_p(buf, val);
2677 break;
2678 case 1:
2679 /* 8 bit read access */
2680 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2681 attrs);
2682 stb_p(buf, val);
2683 break;
2684 default:
2685 abort();
2687 } else {
2688 /* RAM case */
2689 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2690 memcpy(buf, ptr, l);
2693 if (release_lock) {
2694 qemu_mutex_unlock_iothread();
2695 release_lock = false;
2698 len -= l;
2699 buf += l;
2700 addr += l;
2702 if (!len) {
2703 break;
2706 l = len;
2707 mr = address_space_translate(as, addr, &addr1, &l, false);
2710 return result;
2713 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2714 MemTxAttrs attrs, uint8_t *buf, int len)
2716 hwaddr l;
2717 hwaddr addr1;
2718 MemoryRegion *mr;
2719 MemTxResult result = MEMTX_OK;
2721 if (len > 0) {
2722 rcu_read_lock();
2723 l = len;
2724 mr = address_space_translate(as, addr, &addr1, &l, false);
2725 result = address_space_read_continue(as, addr, attrs, buf, len,
2726 addr1, l, mr);
2727 rcu_read_unlock();
2730 return result;
2733 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2734 uint8_t *buf, int len, bool is_write)
2736 if (is_write) {
2737 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2738 } else {
2739 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2743 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2744 int len, int is_write)
2746 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2747 buf, len, is_write);
2750 enum write_rom_type {
2751 WRITE_DATA,
2752 FLUSH_CACHE,
2755 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2756 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2758 hwaddr l;
2759 uint8_t *ptr;
2760 hwaddr addr1;
2761 MemoryRegion *mr;
2763 rcu_read_lock();
2764 while (len > 0) {
2765 l = len;
2766 mr = address_space_translate(as, addr, &addr1, &l, true);
2768 if (!(memory_region_is_ram(mr) ||
2769 memory_region_is_romd(mr))) {
2770 l = memory_access_size(mr, l, addr1);
2771 } else {
2772 /* ROM/RAM case */
2773 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2774 switch (type) {
2775 case WRITE_DATA:
2776 memcpy(ptr, buf, l);
2777 invalidate_and_set_dirty(mr, addr1, l);
2778 break;
2779 case FLUSH_CACHE:
2780 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2781 break;
2784 len -= l;
2785 buf += l;
2786 addr += l;
2788 rcu_read_unlock();
2791 /* used for ROM loading : can write in RAM and ROM */
2792 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2793 const uint8_t *buf, int len)
2795 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2798 void cpu_flush_icache_range(hwaddr start, int len)
2801 * This function should do the same thing as an icache flush that was
2802 * triggered from within the guest. For TCG we are always cache coherent,
2803 * so there is no need to flush anything. For KVM / Xen we need to flush
2804 * the host's instruction cache at least.
2806 if (tcg_enabled()) {
2807 return;
2810 cpu_physical_memory_write_rom_internal(&address_space_memory,
2811 start, NULL, len, FLUSH_CACHE);
2814 typedef struct {
2815 MemoryRegion *mr;
2816 void *buffer;
2817 hwaddr addr;
2818 hwaddr len;
2819 bool in_use;
2820 } BounceBuffer;
2822 static BounceBuffer bounce;
2824 typedef struct MapClient {
2825 QEMUBH *bh;
2826 QLIST_ENTRY(MapClient) link;
2827 } MapClient;
2829 QemuMutex map_client_list_lock;
2830 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2831 = QLIST_HEAD_INITIALIZER(map_client_list);
2833 static void cpu_unregister_map_client_do(MapClient *client)
2835 QLIST_REMOVE(client, link);
2836 g_free(client);
2839 static void cpu_notify_map_clients_locked(void)
2841 MapClient *client;
2843 while (!QLIST_EMPTY(&map_client_list)) {
2844 client = QLIST_FIRST(&map_client_list);
2845 qemu_bh_schedule(client->bh);
2846 cpu_unregister_map_client_do(client);
2850 void cpu_register_map_client(QEMUBH *bh)
2852 MapClient *client = g_malloc(sizeof(*client));
2854 qemu_mutex_lock(&map_client_list_lock);
2855 client->bh = bh;
2856 QLIST_INSERT_HEAD(&map_client_list, client, link);
2857 if (!atomic_read(&bounce.in_use)) {
2858 cpu_notify_map_clients_locked();
2860 qemu_mutex_unlock(&map_client_list_lock);
2863 void cpu_exec_init_all(void)
2865 qemu_mutex_init(&ram_list.mutex);
2866 io_mem_init();
2867 memory_map_init();
2868 qemu_mutex_init(&map_client_list_lock);
2871 void cpu_unregister_map_client(QEMUBH *bh)
2873 MapClient *client;
2875 qemu_mutex_lock(&map_client_list_lock);
2876 QLIST_FOREACH(client, &map_client_list, link) {
2877 if (client->bh == bh) {
2878 cpu_unregister_map_client_do(client);
2879 break;
2882 qemu_mutex_unlock(&map_client_list_lock);
2885 static void cpu_notify_map_clients(void)
2887 qemu_mutex_lock(&map_client_list_lock);
2888 cpu_notify_map_clients_locked();
2889 qemu_mutex_unlock(&map_client_list_lock);
2892 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2894 MemoryRegion *mr;
2895 hwaddr l, xlat;
2897 rcu_read_lock();
2898 while (len > 0) {
2899 l = len;
2900 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2901 if (!memory_access_is_direct(mr, is_write)) {
2902 l = memory_access_size(mr, l, addr);
2903 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2904 return false;
2908 len -= l;
2909 addr += l;
2911 rcu_read_unlock();
2912 return true;
2915 /* Map a physical memory region into a host virtual address.
2916 * May map a subset of the requested range, given by and returned in *plen.
2917 * May return NULL if resources needed to perform the mapping are exhausted.
2918 * Use only for reads OR writes - not for read-modify-write operations.
2919 * Use cpu_register_map_client() to know when retrying the map operation is
2920 * likely to succeed.
2922 void *address_space_map(AddressSpace *as,
2923 hwaddr addr,
2924 hwaddr *plen,
2925 bool is_write)
2927 hwaddr len = *plen;
2928 hwaddr done = 0;
2929 hwaddr l, xlat, base;
2930 MemoryRegion *mr, *this_mr;
2931 void *ptr;
2933 if (len == 0) {
2934 return NULL;
2937 l = len;
2938 rcu_read_lock();
2939 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2941 if (!memory_access_is_direct(mr, is_write)) {
2942 if (atomic_xchg(&bounce.in_use, true)) {
2943 rcu_read_unlock();
2944 return NULL;
2946 /* Avoid unbounded allocations */
2947 l = MIN(l, TARGET_PAGE_SIZE);
2948 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2949 bounce.addr = addr;
2950 bounce.len = l;
2952 memory_region_ref(mr);
2953 bounce.mr = mr;
2954 if (!is_write) {
2955 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2956 bounce.buffer, l);
2959 rcu_read_unlock();
2960 *plen = l;
2961 return bounce.buffer;
2964 base = xlat;
2966 for (;;) {
2967 len -= l;
2968 addr += l;
2969 done += l;
2970 if (len == 0) {
2971 break;
2974 l = len;
2975 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2976 if (this_mr != mr || xlat != base + done) {
2977 break;
2981 memory_region_ref(mr);
2982 *plen = done;
2983 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2984 rcu_read_unlock();
2986 return ptr;
2989 /* Unmaps a memory region previously mapped by address_space_map().
2990 * Will also mark the memory as dirty if is_write == 1. access_len gives
2991 * the amount of memory that was actually read or written by the caller.
2993 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2994 int is_write, hwaddr access_len)
2996 if (buffer != bounce.buffer) {
2997 MemoryRegion *mr;
2998 ram_addr_t addr1;
3000 mr = memory_region_from_host(buffer, &addr1);
3001 assert(mr != NULL);
3002 if (is_write) {
3003 invalidate_and_set_dirty(mr, addr1, access_len);
3005 if (xen_enabled()) {
3006 xen_invalidate_map_cache_entry(buffer);
3008 memory_region_unref(mr);
3009 return;
3011 if (is_write) {
3012 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3013 bounce.buffer, access_len);
3015 qemu_vfree(bounce.buffer);
3016 bounce.buffer = NULL;
3017 memory_region_unref(bounce.mr);
3018 atomic_mb_set(&bounce.in_use, false);
3019 cpu_notify_map_clients();
3022 void *cpu_physical_memory_map(hwaddr addr,
3023 hwaddr *plen,
3024 int is_write)
3026 return address_space_map(&address_space_memory, addr, plen, is_write);
3029 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3030 int is_write, hwaddr access_len)
3032 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3035 /* warning: addr must be aligned */
3036 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3037 MemTxAttrs attrs,
3038 MemTxResult *result,
3039 enum device_endian endian)
3041 uint8_t *ptr;
3042 uint64_t val;
3043 MemoryRegion *mr;
3044 hwaddr l = 4;
3045 hwaddr addr1;
3046 MemTxResult r;
3047 bool release_lock = false;
3049 rcu_read_lock();
3050 mr = address_space_translate(as, addr, &addr1, &l, false);
3051 if (l < 4 || !memory_access_is_direct(mr, false)) {
3052 release_lock |= prepare_mmio_access(mr);
3054 /* I/O case */
3055 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3056 #if defined(TARGET_WORDS_BIGENDIAN)
3057 if (endian == DEVICE_LITTLE_ENDIAN) {
3058 val = bswap32(val);
3060 #else
3061 if (endian == DEVICE_BIG_ENDIAN) {
3062 val = bswap32(val);
3064 #endif
3065 } else {
3066 /* RAM case */
3067 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3068 switch (endian) {
3069 case DEVICE_LITTLE_ENDIAN:
3070 val = ldl_le_p(ptr);
3071 break;
3072 case DEVICE_BIG_ENDIAN:
3073 val = ldl_be_p(ptr);
3074 break;
3075 default:
3076 val = ldl_p(ptr);
3077 break;
3079 r = MEMTX_OK;
3081 if (result) {
3082 *result = r;
3084 if (release_lock) {
3085 qemu_mutex_unlock_iothread();
3087 rcu_read_unlock();
3088 return val;
3091 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3092 MemTxAttrs attrs, MemTxResult *result)
3094 return address_space_ldl_internal(as, addr, attrs, result,
3095 DEVICE_NATIVE_ENDIAN);
3098 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3099 MemTxAttrs attrs, MemTxResult *result)
3101 return address_space_ldl_internal(as, addr, attrs, result,
3102 DEVICE_LITTLE_ENDIAN);
3105 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3106 MemTxAttrs attrs, MemTxResult *result)
3108 return address_space_ldl_internal(as, addr, attrs, result,
3109 DEVICE_BIG_ENDIAN);
3112 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3114 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3117 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3119 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3122 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3124 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3127 /* warning: addr must be aligned */
3128 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3129 MemTxAttrs attrs,
3130 MemTxResult *result,
3131 enum device_endian endian)
3133 uint8_t *ptr;
3134 uint64_t val;
3135 MemoryRegion *mr;
3136 hwaddr l = 8;
3137 hwaddr addr1;
3138 MemTxResult r;
3139 bool release_lock = false;
3141 rcu_read_lock();
3142 mr = address_space_translate(as, addr, &addr1, &l,
3143 false);
3144 if (l < 8 || !memory_access_is_direct(mr, false)) {
3145 release_lock |= prepare_mmio_access(mr);
3147 /* I/O case */
3148 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3149 #if defined(TARGET_WORDS_BIGENDIAN)
3150 if (endian == DEVICE_LITTLE_ENDIAN) {
3151 val = bswap64(val);
3153 #else
3154 if (endian == DEVICE_BIG_ENDIAN) {
3155 val = bswap64(val);
3157 #endif
3158 } else {
3159 /* RAM case */
3160 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3161 switch (endian) {
3162 case DEVICE_LITTLE_ENDIAN:
3163 val = ldq_le_p(ptr);
3164 break;
3165 case DEVICE_BIG_ENDIAN:
3166 val = ldq_be_p(ptr);
3167 break;
3168 default:
3169 val = ldq_p(ptr);
3170 break;
3172 r = MEMTX_OK;
3174 if (result) {
3175 *result = r;
3177 if (release_lock) {
3178 qemu_mutex_unlock_iothread();
3180 rcu_read_unlock();
3181 return val;
3184 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3185 MemTxAttrs attrs, MemTxResult *result)
3187 return address_space_ldq_internal(as, addr, attrs, result,
3188 DEVICE_NATIVE_ENDIAN);
3191 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3192 MemTxAttrs attrs, MemTxResult *result)
3194 return address_space_ldq_internal(as, addr, attrs, result,
3195 DEVICE_LITTLE_ENDIAN);
3198 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3199 MemTxAttrs attrs, MemTxResult *result)
3201 return address_space_ldq_internal(as, addr, attrs, result,
3202 DEVICE_BIG_ENDIAN);
3205 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3207 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3210 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3212 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3215 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3217 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3220 /* XXX: optimize */
3221 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3222 MemTxAttrs attrs, MemTxResult *result)
3224 uint8_t val;
3225 MemTxResult r;
3227 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3228 if (result) {
3229 *result = r;
3231 return val;
3234 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3236 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3239 /* warning: addr must be aligned */
3240 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3241 hwaddr addr,
3242 MemTxAttrs attrs,
3243 MemTxResult *result,
3244 enum device_endian endian)
3246 uint8_t *ptr;
3247 uint64_t val;
3248 MemoryRegion *mr;
3249 hwaddr l = 2;
3250 hwaddr addr1;
3251 MemTxResult r;
3252 bool release_lock = false;
3254 rcu_read_lock();
3255 mr = address_space_translate(as, addr, &addr1, &l,
3256 false);
3257 if (l < 2 || !memory_access_is_direct(mr, false)) {
3258 release_lock |= prepare_mmio_access(mr);
3260 /* I/O case */
3261 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3262 #if defined(TARGET_WORDS_BIGENDIAN)
3263 if (endian == DEVICE_LITTLE_ENDIAN) {
3264 val = bswap16(val);
3266 #else
3267 if (endian == DEVICE_BIG_ENDIAN) {
3268 val = bswap16(val);
3270 #endif
3271 } else {
3272 /* RAM case */
3273 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3274 switch (endian) {
3275 case DEVICE_LITTLE_ENDIAN:
3276 val = lduw_le_p(ptr);
3277 break;
3278 case DEVICE_BIG_ENDIAN:
3279 val = lduw_be_p(ptr);
3280 break;
3281 default:
3282 val = lduw_p(ptr);
3283 break;
3285 r = MEMTX_OK;
3287 if (result) {
3288 *result = r;
3290 if (release_lock) {
3291 qemu_mutex_unlock_iothread();
3293 rcu_read_unlock();
3294 return val;
3297 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3298 MemTxAttrs attrs, MemTxResult *result)
3300 return address_space_lduw_internal(as, addr, attrs, result,
3301 DEVICE_NATIVE_ENDIAN);
3304 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3305 MemTxAttrs attrs, MemTxResult *result)
3307 return address_space_lduw_internal(as, addr, attrs, result,
3308 DEVICE_LITTLE_ENDIAN);
3311 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3312 MemTxAttrs attrs, MemTxResult *result)
3314 return address_space_lduw_internal(as, addr, attrs, result,
3315 DEVICE_BIG_ENDIAN);
3318 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3320 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3323 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3325 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3328 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3330 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3333 /* warning: addr must be aligned. The ram page is not masked as dirty
3334 and the code inside is not invalidated. It is useful if the dirty
3335 bits are used to track modified PTEs */
3336 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3337 MemTxAttrs attrs, MemTxResult *result)
3339 uint8_t *ptr;
3340 MemoryRegion *mr;
3341 hwaddr l = 4;
3342 hwaddr addr1;
3343 MemTxResult r;
3344 uint8_t dirty_log_mask;
3345 bool release_lock = false;
3347 rcu_read_lock();
3348 mr = address_space_translate(as, addr, &addr1, &l,
3349 true);
3350 if (l < 4 || !memory_access_is_direct(mr, true)) {
3351 release_lock |= prepare_mmio_access(mr);
3353 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3354 } else {
3355 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3356 stl_p(ptr, val);
3358 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3359 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3360 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3361 4, dirty_log_mask);
3362 r = MEMTX_OK;
3364 if (result) {
3365 *result = r;
3367 if (release_lock) {
3368 qemu_mutex_unlock_iothread();
3370 rcu_read_unlock();
3373 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3375 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3378 /* warning: addr must be aligned */
3379 static inline void address_space_stl_internal(AddressSpace *as,
3380 hwaddr addr, uint32_t val,
3381 MemTxAttrs attrs,
3382 MemTxResult *result,
3383 enum device_endian endian)
3385 uint8_t *ptr;
3386 MemoryRegion *mr;
3387 hwaddr l = 4;
3388 hwaddr addr1;
3389 MemTxResult r;
3390 bool release_lock = false;
3392 rcu_read_lock();
3393 mr = address_space_translate(as, addr, &addr1, &l,
3394 true);
3395 if (l < 4 || !memory_access_is_direct(mr, true)) {
3396 release_lock |= prepare_mmio_access(mr);
3398 #if defined(TARGET_WORDS_BIGENDIAN)
3399 if (endian == DEVICE_LITTLE_ENDIAN) {
3400 val = bswap32(val);
3402 #else
3403 if (endian == DEVICE_BIG_ENDIAN) {
3404 val = bswap32(val);
3406 #endif
3407 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3408 } else {
3409 /* RAM case */
3410 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3411 switch (endian) {
3412 case DEVICE_LITTLE_ENDIAN:
3413 stl_le_p(ptr, val);
3414 break;
3415 case DEVICE_BIG_ENDIAN:
3416 stl_be_p(ptr, val);
3417 break;
3418 default:
3419 stl_p(ptr, val);
3420 break;
3422 invalidate_and_set_dirty(mr, addr1, 4);
3423 r = MEMTX_OK;
3425 if (result) {
3426 *result = r;
3428 if (release_lock) {
3429 qemu_mutex_unlock_iothread();
3431 rcu_read_unlock();
3434 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3435 MemTxAttrs attrs, MemTxResult *result)
3437 address_space_stl_internal(as, addr, val, attrs, result,
3438 DEVICE_NATIVE_ENDIAN);
3441 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3442 MemTxAttrs attrs, MemTxResult *result)
3444 address_space_stl_internal(as, addr, val, attrs, result,
3445 DEVICE_LITTLE_ENDIAN);
3448 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3449 MemTxAttrs attrs, MemTxResult *result)
3451 address_space_stl_internal(as, addr, val, attrs, result,
3452 DEVICE_BIG_ENDIAN);
3455 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3457 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3460 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3462 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3465 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3467 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3470 /* XXX: optimize */
3471 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3472 MemTxAttrs attrs, MemTxResult *result)
3474 uint8_t v = val;
3475 MemTxResult r;
3477 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3478 if (result) {
3479 *result = r;
3483 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3485 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3488 /* warning: addr must be aligned */
3489 static inline void address_space_stw_internal(AddressSpace *as,
3490 hwaddr addr, uint32_t val,
3491 MemTxAttrs attrs,
3492 MemTxResult *result,
3493 enum device_endian endian)
3495 uint8_t *ptr;
3496 MemoryRegion *mr;
3497 hwaddr l = 2;
3498 hwaddr addr1;
3499 MemTxResult r;
3500 bool release_lock = false;
3502 rcu_read_lock();
3503 mr = address_space_translate(as, addr, &addr1, &l, true);
3504 if (l < 2 || !memory_access_is_direct(mr, true)) {
3505 release_lock |= prepare_mmio_access(mr);
3507 #if defined(TARGET_WORDS_BIGENDIAN)
3508 if (endian == DEVICE_LITTLE_ENDIAN) {
3509 val = bswap16(val);
3511 #else
3512 if (endian == DEVICE_BIG_ENDIAN) {
3513 val = bswap16(val);
3515 #endif
3516 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3517 } else {
3518 /* RAM case */
3519 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3520 switch (endian) {
3521 case DEVICE_LITTLE_ENDIAN:
3522 stw_le_p(ptr, val);
3523 break;
3524 case DEVICE_BIG_ENDIAN:
3525 stw_be_p(ptr, val);
3526 break;
3527 default:
3528 stw_p(ptr, val);
3529 break;
3531 invalidate_and_set_dirty(mr, addr1, 2);
3532 r = MEMTX_OK;
3534 if (result) {
3535 *result = r;
3537 if (release_lock) {
3538 qemu_mutex_unlock_iothread();
3540 rcu_read_unlock();
3543 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3544 MemTxAttrs attrs, MemTxResult *result)
3546 address_space_stw_internal(as, addr, val, attrs, result,
3547 DEVICE_NATIVE_ENDIAN);
3550 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3551 MemTxAttrs attrs, MemTxResult *result)
3553 address_space_stw_internal(as, addr, val, attrs, result,
3554 DEVICE_LITTLE_ENDIAN);
3557 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3558 MemTxAttrs attrs, MemTxResult *result)
3560 address_space_stw_internal(as, addr, val, attrs, result,
3561 DEVICE_BIG_ENDIAN);
3564 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3566 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3569 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3571 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3574 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3576 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3579 /* XXX: optimize */
3580 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3581 MemTxAttrs attrs, MemTxResult *result)
3583 MemTxResult r;
3584 val = tswap64(val);
3585 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3586 if (result) {
3587 *result = r;
3591 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3592 MemTxAttrs attrs, MemTxResult *result)
3594 MemTxResult r;
3595 val = cpu_to_le64(val);
3596 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3597 if (result) {
3598 *result = r;
3601 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3602 MemTxAttrs attrs, MemTxResult *result)
3604 MemTxResult r;
3605 val = cpu_to_be64(val);
3606 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3607 if (result) {
3608 *result = r;
3612 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3614 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3617 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3619 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3622 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3624 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3627 /* virtual memory access for debug (includes writing to ROM) */
3628 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3629 uint8_t *buf, int len, int is_write)
3631 int l;
3632 hwaddr phys_addr;
3633 target_ulong page;
3635 while (len > 0) {
3636 int asidx;
3637 MemTxAttrs attrs;
3639 page = addr & TARGET_PAGE_MASK;
3640 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3641 asidx = cpu_asidx_from_attrs(cpu, attrs);
3642 /* if no physical page mapped, return an error */
3643 if (phys_addr == -1)
3644 return -1;
3645 l = (page + TARGET_PAGE_SIZE) - addr;
3646 if (l > len)
3647 l = len;
3648 phys_addr += (addr & ~TARGET_PAGE_MASK);
3649 if (is_write) {
3650 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3651 phys_addr, buf, l);
3652 } else {
3653 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3654 MEMTXATTRS_UNSPECIFIED,
3655 buf, l, 0);
3657 len -= l;
3658 buf += l;
3659 addr += l;
3661 return 0;
3665 * Allows code that needs to deal with migration bitmaps etc to still be built
3666 * target independent.
3668 size_t qemu_target_page_bits(void)
3670 return TARGET_PAGE_BITS;
3673 #endif
3676 * A helper function for the _utterly broken_ virtio device model to find out if
3677 * it's running on a big endian machine. Don't do this at home kids!
3679 bool target_words_bigendian(void);
3680 bool target_words_bigendian(void)
3682 #if defined(TARGET_WORDS_BIGENDIAN)
3683 return true;
3684 #else
3685 return false;
3686 #endif
3689 #ifndef CONFIG_USER_ONLY
3690 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3692 MemoryRegion*mr;
3693 hwaddr l = 1;
3694 bool res;
3696 rcu_read_lock();
3697 mr = address_space_translate(&address_space_memory,
3698 phys_addr, &phys_addr, &l, false);
3700 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3701 rcu_read_unlock();
3702 return res;
3705 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3707 RAMBlock *block;
3708 int ret = 0;
3710 rcu_read_lock();
3711 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3712 ret = func(block->idstr, block->host, block->offset,
3713 block->used_length, opaque);
3714 if (ret) {
3715 break;
3718 rcu_read_unlock();
3719 return ret;
3721 #endif