Merge remote-tracking branch 'fam/tags/for-upstream' into staging
[qemu/kevin.git] / exec.c
blob3d867f166cc6930b7073bd2d1f17b15faf41ad69
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include "qemu.h"
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 #ifdef TARGET_PAGE_BITS_VARY
97 int target_page_bits;
98 bool target_page_bits_decided;
99 #endif
101 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
102 /* current CPU in the current thread. It is only valid inside
103 cpu_exec() */
104 __thread CPUState *current_cpu;
105 /* 0 = Do not count executed instructions.
106 1 = Precise instruction counting.
107 2 = Adaptive rate instruction counting. */
108 int use_icount;
110 bool set_preferred_target_page_bits(int bits)
112 /* The target page size is the lowest common denominator for all
113 * the CPUs in the system, so we can only make it smaller, never
114 * larger. And we can't make it smaller once we've committed to
115 * a particular size.
117 #ifdef TARGET_PAGE_BITS_VARY
118 assert(bits >= TARGET_PAGE_BITS_MIN);
119 if (target_page_bits == 0 || target_page_bits > bits) {
120 if (target_page_bits_decided) {
121 return false;
123 target_page_bits = bits;
125 #endif
126 return true;
129 #if !defined(CONFIG_USER_ONLY)
131 static void finalize_target_page_bits(void)
133 #ifdef TARGET_PAGE_BITS_VARY
134 if (target_page_bits == 0) {
135 target_page_bits = TARGET_PAGE_BITS_MIN;
137 target_page_bits_decided = true;
138 #endif
141 typedef struct PhysPageEntry PhysPageEntry;
143 struct PhysPageEntry {
144 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
145 uint32_t skip : 6;
146 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
147 uint32_t ptr : 26;
150 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
152 /* Size of the L2 (and L3, etc) page tables. */
153 #define ADDR_SPACE_BITS 64
155 #define P_L2_BITS 9
156 #define P_L2_SIZE (1 << P_L2_BITS)
158 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
160 typedef PhysPageEntry Node[P_L2_SIZE];
162 typedef struct PhysPageMap {
163 struct rcu_head rcu;
165 unsigned sections_nb;
166 unsigned sections_nb_alloc;
167 unsigned nodes_nb;
168 unsigned nodes_nb_alloc;
169 Node *nodes;
170 MemoryRegionSection *sections;
171 } PhysPageMap;
173 struct AddressSpaceDispatch {
174 struct rcu_head rcu;
176 MemoryRegionSection *mru_section;
177 /* This is a multi-level map on the physical address space.
178 * The bottom level has pointers to MemoryRegionSections.
180 PhysPageEntry phys_map;
181 PhysPageMap map;
182 AddressSpace *as;
185 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
186 typedef struct subpage_t {
187 MemoryRegion iomem;
188 AddressSpace *as;
189 hwaddr base;
190 uint16_t sub_section[];
191 } subpage_t;
193 #define PHYS_SECTION_UNASSIGNED 0
194 #define PHYS_SECTION_NOTDIRTY 1
195 #define PHYS_SECTION_ROM 2
196 #define PHYS_SECTION_WATCH 3
198 static void io_mem_init(void);
199 static void memory_map_init(void);
200 static void tcg_commit(MemoryListener *listener);
202 static MemoryRegion io_mem_watch;
205 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
206 * @cpu: the CPU whose AddressSpace this is
207 * @as: the AddressSpace itself
208 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
209 * @tcg_as_listener: listener for tracking changes to the AddressSpace
211 struct CPUAddressSpace {
212 CPUState *cpu;
213 AddressSpace *as;
214 struct AddressSpaceDispatch *memory_dispatch;
215 MemoryListener tcg_as_listener;
218 #endif
220 #if !defined(CONFIG_USER_ONLY)
222 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
224 static unsigned alloc_hint = 16;
225 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
226 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
227 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
228 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
229 alloc_hint = map->nodes_nb_alloc;
233 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
235 unsigned i;
236 uint32_t ret;
237 PhysPageEntry e;
238 PhysPageEntry *p;
240 ret = map->nodes_nb++;
241 p = map->nodes[ret];
242 assert(ret != PHYS_MAP_NODE_NIL);
243 assert(ret != map->nodes_nb_alloc);
245 e.skip = leaf ? 0 : 1;
246 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
247 for (i = 0; i < P_L2_SIZE; ++i) {
248 memcpy(&p[i], &e, sizeof(e));
250 return ret;
253 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
254 hwaddr *index, hwaddr *nb, uint16_t leaf,
255 int level)
257 PhysPageEntry *p;
258 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
260 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
261 lp->ptr = phys_map_node_alloc(map, level == 0);
263 p = map->nodes[lp->ptr];
264 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
266 while (*nb && lp < &p[P_L2_SIZE]) {
267 if ((*index & (step - 1)) == 0 && *nb >= step) {
268 lp->skip = 0;
269 lp->ptr = leaf;
270 *index += step;
271 *nb -= step;
272 } else {
273 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
275 ++lp;
279 static void phys_page_set(AddressSpaceDispatch *d,
280 hwaddr index, hwaddr nb,
281 uint16_t leaf)
283 /* Wildly overreserve - it doesn't matter much. */
284 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
286 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
289 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
290 * and update our entry so we can skip it and go directly to the destination.
292 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
294 unsigned valid_ptr = P_L2_SIZE;
295 int valid = 0;
296 PhysPageEntry *p;
297 int i;
299 if (lp->ptr == PHYS_MAP_NODE_NIL) {
300 return;
303 p = nodes[lp->ptr];
304 for (i = 0; i < P_L2_SIZE; i++) {
305 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
306 continue;
309 valid_ptr = i;
310 valid++;
311 if (p[i].skip) {
312 phys_page_compact(&p[i], nodes);
316 /* We can only compress if there's only one child. */
317 if (valid != 1) {
318 return;
321 assert(valid_ptr < P_L2_SIZE);
323 /* Don't compress if it won't fit in the # of bits we have. */
324 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
325 return;
328 lp->ptr = p[valid_ptr].ptr;
329 if (!p[valid_ptr].skip) {
330 /* If our only child is a leaf, make this a leaf. */
331 /* By design, we should have made this node a leaf to begin with so we
332 * should never reach here.
333 * But since it's so simple to handle this, let's do it just in case we
334 * change this rule.
336 lp->skip = 0;
337 } else {
338 lp->skip += p[valid_ptr].skip;
342 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
344 if (d->phys_map.skip) {
345 phys_page_compact(&d->phys_map, d->map.nodes);
349 static inline bool section_covers_addr(const MemoryRegionSection *section,
350 hwaddr addr)
352 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
353 * the section must cover the entire address space.
355 return int128_gethi(section->size) ||
356 range_covers_byte(section->offset_within_address_space,
357 int128_getlo(section->size), addr);
360 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
361 Node *nodes, MemoryRegionSection *sections)
363 PhysPageEntry *p;
364 hwaddr index = addr >> TARGET_PAGE_BITS;
365 int i;
367 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
368 if (lp.ptr == PHYS_MAP_NODE_NIL) {
369 return &sections[PHYS_SECTION_UNASSIGNED];
371 p = nodes[lp.ptr];
372 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
375 if (section_covers_addr(&sections[lp.ptr], addr)) {
376 return &sections[lp.ptr];
377 } else {
378 return &sections[PHYS_SECTION_UNASSIGNED];
382 bool memory_region_is_unassigned(MemoryRegion *mr)
384 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
385 && mr != &io_mem_watch;
388 /* Called from RCU critical section */
389 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
390 hwaddr addr,
391 bool resolve_subpage)
393 MemoryRegionSection *section = atomic_read(&d->mru_section);
394 subpage_t *subpage;
395 bool update;
397 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
398 section_covers_addr(section, addr)) {
399 update = false;
400 } else {
401 section = phys_page_find(d->phys_map, addr, d->map.nodes,
402 d->map.sections);
403 update = true;
405 if (resolve_subpage && section->mr->subpage) {
406 subpage = container_of(section->mr, subpage_t, iomem);
407 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
409 if (update) {
410 atomic_set(&d->mru_section, section);
412 return section;
415 /* Called from RCU critical section */
416 static MemoryRegionSection *
417 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
418 hwaddr *plen, bool resolve_subpage)
420 MemoryRegionSection *section;
421 MemoryRegion *mr;
422 Int128 diff;
424 section = address_space_lookup_region(d, addr, resolve_subpage);
425 /* Compute offset within MemoryRegionSection */
426 addr -= section->offset_within_address_space;
428 /* Compute offset within MemoryRegion */
429 *xlat = addr + section->offset_within_region;
431 mr = section->mr;
433 /* MMIO registers can be expected to perform full-width accesses based only
434 * on their address, without considering adjacent registers that could
435 * decode to completely different MemoryRegions. When such registers
436 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
437 * regions overlap wildly. For this reason we cannot clamp the accesses
438 * here.
440 * If the length is small (as is the case for address_space_ldl/stl),
441 * everything works fine. If the incoming length is large, however,
442 * the caller really has to do the clamping through memory_access_size.
444 if (memory_region_is_ram(mr)) {
445 diff = int128_sub(section->size, int128_make64(addr));
446 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
448 return section;
451 /* Called from RCU critical section */
452 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
453 hwaddr *xlat, hwaddr *plen,
454 bool is_write)
456 IOMMUTLBEntry iotlb;
457 MemoryRegionSection *section;
458 MemoryRegion *mr;
460 for (;;) {
461 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
462 section = address_space_translate_internal(d, addr, &addr, plen, true);
463 mr = section->mr;
465 if (!mr->iommu_ops) {
466 break;
469 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
470 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
471 | (addr & iotlb.addr_mask));
472 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
473 if (!(iotlb.perm & (1 << is_write))) {
474 mr = &io_mem_unassigned;
475 break;
478 as = iotlb.target_as;
481 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
482 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
483 *plen = MIN(page, *plen);
486 *xlat = addr;
487 return mr;
490 /* Called from RCU critical section */
491 MemoryRegionSection *
492 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
493 hwaddr *xlat, hwaddr *plen)
495 MemoryRegionSection *section;
496 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
498 section = address_space_translate_internal(d, addr, xlat, plen, false);
500 assert(!section->mr->iommu_ops);
501 return section;
503 #endif
505 #if !defined(CONFIG_USER_ONLY)
507 static int cpu_common_post_load(void *opaque, int version_id)
509 CPUState *cpu = opaque;
511 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
512 version_id is increased. */
513 cpu->interrupt_request &= ~0x01;
514 tlb_flush(cpu, 1);
516 return 0;
519 static int cpu_common_pre_load(void *opaque)
521 CPUState *cpu = opaque;
523 cpu->exception_index = -1;
525 return 0;
528 static bool cpu_common_exception_index_needed(void *opaque)
530 CPUState *cpu = opaque;
532 return tcg_enabled() && cpu->exception_index != -1;
535 static const VMStateDescription vmstate_cpu_common_exception_index = {
536 .name = "cpu_common/exception_index",
537 .version_id = 1,
538 .minimum_version_id = 1,
539 .needed = cpu_common_exception_index_needed,
540 .fields = (VMStateField[]) {
541 VMSTATE_INT32(exception_index, CPUState),
542 VMSTATE_END_OF_LIST()
546 static bool cpu_common_crash_occurred_needed(void *opaque)
548 CPUState *cpu = opaque;
550 return cpu->crash_occurred;
553 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
554 .name = "cpu_common/crash_occurred",
555 .version_id = 1,
556 .minimum_version_id = 1,
557 .needed = cpu_common_crash_occurred_needed,
558 .fields = (VMStateField[]) {
559 VMSTATE_BOOL(crash_occurred, CPUState),
560 VMSTATE_END_OF_LIST()
564 const VMStateDescription vmstate_cpu_common = {
565 .name = "cpu_common",
566 .version_id = 1,
567 .minimum_version_id = 1,
568 .pre_load = cpu_common_pre_load,
569 .post_load = cpu_common_post_load,
570 .fields = (VMStateField[]) {
571 VMSTATE_UINT32(halted, CPUState),
572 VMSTATE_UINT32(interrupt_request, CPUState),
573 VMSTATE_END_OF_LIST()
575 .subsections = (const VMStateDescription*[]) {
576 &vmstate_cpu_common_exception_index,
577 &vmstate_cpu_common_crash_occurred,
578 NULL
582 #endif
584 CPUState *qemu_get_cpu(int index)
586 CPUState *cpu;
588 CPU_FOREACH(cpu) {
589 if (cpu->cpu_index == index) {
590 return cpu;
594 return NULL;
597 #if !defined(CONFIG_USER_ONLY)
598 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
600 CPUAddressSpace *newas;
602 /* Target code should have set num_ases before calling us */
603 assert(asidx < cpu->num_ases);
605 if (asidx == 0) {
606 /* address space 0 gets the convenience alias */
607 cpu->as = as;
610 /* KVM cannot currently support multiple address spaces. */
611 assert(asidx == 0 || !kvm_enabled());
613 if (!cpu->cpu_ases) {
614 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
617 newas = &cpu->cpu_ases[asidx];
618 newas->cpu = cpu;
619 newas->as = as;
620 if (tcg_enabled()) {
621 newas->tcg_as_listener.commit = tcg_commit;
622 memory_listener_register(&newas->tcg_as_listener, as);
626 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
628 /* Return the AddressSpace corresponding to the specified index */
629 return cpu->cpu_ases[asidx].as;
631 #endif
633 void cpu_exec_unrealizefn(CPUState *cpu)
635 CPUClass *cc = CPU_GET_CLASS(cpu);
637 cpu_list_remove(cpu);
639 if (cc->vmsd != NULL) {
640 vmstate_unregister(NULL, cc->vmsd, cpu);
642 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
643 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
647 void cpu_exec_initfn(CPUState *cpu)
649 cpu->as = NULL;
650 cpu->num_ases = 0;
652 #ifndef CONFIG_USER_ONLY
653 cpu->thread_id = qemu_get_thread_id();
655 /* This is a softmmu CPU object, so create a property for it
656 * so users can wire up its memory. (This can't go in qom/cpu.c
657 * because that file is compiled only once for both user-mode
658 * and system builds.) The default if no link is set up is to use
659 * the system address space.
661 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
662 (Object **)&cpu->memory,
663 qdev_prop_allow_set_link_before_realize,
664 OBJ_PROP_LINK_UNREF_ON_RELEASE,
665 &error_abort);
666 cpu->memory = system_memory;
667 object_ref(OBJECT(cpu->memory));
668 #endif
671 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
673 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
675 cpu_list_add(cpu);
677 #ifndef CONFIG_USER_ONLY
678 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
679 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
681 if (cc->vmsd != NULL) {
682 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
684 #endif
687 #if defined(CONFIG_USER_ONLY)
688 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
690 mmap_lock();
691 tb_lock();
692 tb_invalidate_phys_page_range(pc, pc + 1, 0);
693 tb_unlock();
694 mmap_unlock();
696 #else
697 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
699 MemTxAttrs attrs;
700 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
701 int asidx = cpu_asidx_from_attrs(cpu, attrs);
702 if (phys != -1) {
703 /* Locks grabbed by tb_invalidate_phys_addr */
704 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
705 phys | (pc & ~TARGET_PAGE_MASK));
708 #endif
710 #if defined(CONFIG_USER_ONLY)
711 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
716 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
717 int flags)
719 return -ENOSYS;
722 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
726 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
727 int flags, CPUWatchpoint **watchpoint)
729 return -ENOSYS;
731 #else
732 /* Add a watchpoint. */
733 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
734 int flags, CPUWatchpoint **watchpoint)
736 CPUWatchpoint *wp;
738 /* forbid ranges which are empty or run off the end of the address space */
739 if (len == 0 || (addr + len - 1) < addr) {
740 error_report("tried to set invalid watchpoint at %"
741 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
742 return -EINVAL;
744 wp = g_malloc(sizeof(*wp));
746 wp->vaddr = addr;
747 wp->len = len;
748 wp->flags = flags;
750 /* keep all GDB-injected watchpoints in front */
751 if (flags & BP_GDB) {
752 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
753 } else {
754 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
757 tlb_flush_page(cpu, addr);
759 if (watchpoint)
760 *watchpoint = wp;
761 return 0;
764 /* Remove a specific watchpoint. */
765 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
766 int flags)
768 CPUWatchpoint *wp;
770 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
771 if (addr == wp->vaddr && len == wp->len
772 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
773 cpu_watchpoint_remove_by_ref(cpu, wp);
774 return 0;
777 return -ENOENT;
780 /* Remove a specific watchpoint by reference. */
781 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
783 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
785 tlb_flush_page(cpu, watchpoint->vaddr);
787 g_free(watchpoint);
790 /* Remove all matching watchpoints. */
791 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
793 CPUWatchpoint *wp, *next;
795 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
796 if (wp->flags & mask) {
797 cpu_watchpoint_remove_by_ref(cpu, wp);
802 /* Return true if this watchpoint address matches the specified
803 * access (ie the address range covered by the watchpoint overlaps
804 * partially or completely with the address range covered by the
805 * access).
807 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
808 vaddr addr,
809 vaddr len)
811 /* We know the lengths are non-zero, but a little caution is
812 * required to avoid errors in the case where the range ends
813 * exactly at the top of the address space and so addr + len
814 * wraps round to zero.
816 vaddr wpend = wp->vaddr + wp->len - 1;
817 vaddr addrend = addr + len - 1;
819 return !(addr > wpend || wp->vaddr > addrend);
822 #endif
824 /* Add a breakpoint. */
825 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
826 CPUBreakpoint **breakpoint)
828 CPUBreakpoint *bp;
830 bp = g_malloc(sizeof(*bp));
832 bp->pc = pc;
833 bp->flags = flags;
835 /* keep all GDB-injected breakpoints in front */
836 if (flags & BP_GDB) {
837 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
838 } else {
839 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
842 breakpoint_invalidate(cpu, pc);
844 if (breakpoint) {
845 *breakpoint = bp;
847 return 0;
850 /* Remove a specific breakpoint. */
851 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
853 CPUBreakpoint *bp;
855 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
856 if (bp->pc == pc && bp->flags == flags) {
857 cpu_breakpoint_remove_by_ref(cpu, bp);
858 return 0;
861 return -ENOENT;
864 /* Remove a specific breakpoint by reference. */
865 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
867 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
869 breakpoint_invalidate(cpu, breakpoint->pc);
871 g_free(breakpoint);
874 /* Remove all matching breakpoints. */
875 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
877 CPUBreakpoint *bp, *next;
879 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
880 if (bp->flags & mask) {
881 cpu_breakpoint_remove_by_ref(cpu, bp);
886 /* enable or disable single step mode. EXCP_DEBUG is returned by the
887 CPU loop after each instruction */
888 void cpu_single_step(CPUState *cpu, int enabled)
890 if (cpu->singlestep_enabled != enabled) {
891 cpu->singlestep_enabled = enabled;
892 if (kvm_enabled()) {
893 kvm_update_guest_debug(cpu, 0);
894 } else {
895 /* must flush all the translated code to avoid inconsistencies */
896 /* XXX: only flush what is necessary */
897 tb_flush(cpu);
902 void cpu_abort(CPUState *cpu, const char *fmt, ...)
904 va_list ap;
905 va_list ap2;
907 va_start(ap, fmt);
908 va_copy(ap2, ap);
909 fprintf(stderr, "qemu: fatal: ");
910 vfprintf(stderr, fmt, ap);
911 fprintf(stderr, "\n");
912 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
913 if (qemu_log_separate()) {
914 qemu_log_lock();
915 qemu_log("qemu: fatal: ");
916 qemu_log_vprintf(fmt, ap2);
917 qemu_log("\n");
918 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
919 qemu_log_flush();
920 qemu_log_unlock();
921 qemu_log_close();
923 va_end(ap2);
924 va_end(ap);
925 replay_finish();
926 #if defined(CONFIG_USER_ONLY)
928 struct sigaction act;
929 sigfillset(&act.sa_mask);
930 act.sa_handler = SIG_DFL;
931 sigaction(SIGABRT, &act, NULL);
933 #endif
934 abort();
937 #if !defined(CONFIG_USER_ONLY)
938 /* Called from RCU critical section */
939 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
941 RAMBlock *block;
943 block = atomic_rcu_read(&ram_list.mru_block);
944 if (block && addr - block->offset < block->max_length) {
945 return block;
947 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
948 if (addr - block->offset < block->max_length) {
949 goto found;
953 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
954 abort();
956 found:
957 /* It is safe to write mru_block outside the iothread lock. This
958 * is what happens:
960 * mru_block = xxx
961 * rcu_read_unlock()
962 * xxx removed from list
963 * rcu_read_lock()
964 * read mru_block
965 * mru_block = NULL;
966 * call_rcu(reclaim_ramblock, xxx);
967 * rcu_read_unlock()
969 * atomic_rcu_set is not needed here. The block was already published
970 * when it was placed into the list. Here we're just making an extra
971 * copy of the pointer.
973 ram_list.mru_block = block;
974 return block;
977 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
979 CPUState *cpu;
980 ram_addr_t start1;
981 RAMBlock *block;
982 ram_addr_t end;
984 end = TARGET_PAGE_ALIGN(start + length);
985 start &= TARGET_PAGE_MASK;
987 rcu_read_lock();
988 block = qemu_get_ram_block(start);
989 assert(block == qemu_get_ram_block(end - 1));
990 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
991 CPU_FOREACH(cpu) {
992 tlb_reset_dirty(cpu, start1, length);
994 rcu_read_unlock();
997 /* Note: start and end must be within the same ram block. */
998 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
999 ram_addr_t length,
1000 unsigned client)
1002 DirtyMemoryBlocks *blocks;
1003 unsigned long end, page;
1004 bool dirty = false;
1006 if (length == 0) {
1007 return false;
1010 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1011 page = start >> TARGET_PAGE_BITS;
1013 rcu_read_lock();
1015 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1017 while (page < end) {
1018 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1019 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1020 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1022 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1023 offset, num);
1024 page += num;
1027 rcu_read_unlock();
1029 if (dirty && tcg_enabled()) {
1030 tlb_reset_dirty_range_all(start, length);
1033 return dirty;
1036 /* Called from RCU critical section */
1037 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1038 MemoryRegionSection *section,
1039 target_ulong vaddr,
1040 hwaddr paddr, hwaddr xlat,
1041 int prot,
1042 target_ulong *address)
1044 hwaddr iotlb;
1045 CPUWatchpoint *wp;
1047 if (memory_region_is_ram(section->mr)) {
1048 /* Normal RAM. */
1049 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1050 if (!section->readonly) {
1051 iotlb |= PHYS_SECTION_NOTDIRTY;
1052 } else {
1053 iotlb |= PHYS_SECTION_ROM;
1055 } else {
1056 AddressSpaceDispatch *d;
1058 d = atomic_rcu_read(&section->address_space->dispatch);
1059 iotlb = section - d->map.sections;
1060 iotlb += xlat;
1063 /* Make accesses to pages with watchpoints go via the
1064 watchpoint trap routines. */
1065 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1066 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1067 /* Avoid trapping reads of pages with a write breakpoint. */
1068 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1069 iotlb = PHYS_SECTION_WATCH + paddr;
1070 *address |= TLB_MMIO;
1071 break;
1076 return iotlb;
1078 #endif /* defined(CONFIG_USER_ONLY) */
1080 #if !defined(CONFIG_USER_ONLY)
1082 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1083 uint16_t section);
1084 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1086 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1087 qemu_anon_ram_alloc;
1090 * Set a custom physical guest memory alloator.
1091 * Accelerators with unusual needs may need this. Hopefully, we can
1092 * get rid of it eventually.
1094 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1096 phys_mem_alloc = alloc;
1099 static uint16_t phys_section_add(PhysPageMap *map,
1100 MemoryRegionSection *section)
1102 /* The physical section number is ORed with a page-aligned
1103 * pointer to produce the iotlb entries. Thus it should
1104 * never overflow into the page-aligned value.
1106 assert(map->sections_nb < TARGET_PAGE_SIZE);
1108 if (map->sections_nb == map->sections_nb_alloc) {
1109 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1110 map->sections = g_renew(MemoryRegionSection, map->sections,
1111 map->sections_nb_alloc);
1113 map->sections[map->sections_nb] = *section;
1114 memory_region_ref(section->mr);
1115 return map->sections_nb++;
1118 static void phys_section_destroy(MemoryRegion *mr)
1120 bool have_sub_page = mr->subpage;
1122 memory_region_unref(mr);
1124 if (have_sub_page) {
1125 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1126 object_unref(OBJECT(&subpage->iomem));
1127 g_free(subpage);
1131 static void phys_sections_free(PhysPageMap *map)
1133 while (map->sections_nb > 0) {
1134 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1135 phys_section_destroy(section->mr);
1137 g_free(map->sections);
1138 g_free(map->nodes);
1141 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1143 subpage_t *subpage;
1144 hwaddr base = section->offset_within_address_space
1145 & TARGET_PAGE_MASK;
1146 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1147 d->map.nodes, d->map.sections);
1148 MemoryRegionSection subsection = {
1149 .offset_within_address_space = base,
1150 .size = int128_make64(TARGET_PAGE_SIZE),
1152 hwaddr start, end;
1154 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1156 if (!(existing->mr->subpage)) {
1157 subpage = subpage_init(d->as, base);
1158 subsection.address_space = d->as;
1159 subsection.mr = &subpage->iomem;
1160 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1161 phys_section_add(&d->map, &subsection));
1162 } else {
1163 subpage = container_of(existing->mr, subpage_t, iomem);
1165 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1166 end = start + int128_get64(section->size) - 1;
1167 subpage_register(subpage, start, end,
1168 phys_section_add(&d->map, section));
1172 static void register_multipage(AddressSpaceDispatch *d,
1173 MemoryRegionSection *section)
1175 hwaddr start_addr = section->offset_within_address_space;
1176 uint16_t section_index = phys_section_add(&d->map, section);
1177 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1178 TARGET_PAGE_BITS));
1180 assert(num_pages);
1181 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1184 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1186 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1187 AddressSpaceDispatch *d = as->next_dispatch;
1188 MemoryRegionSection now = *section, remain = *section;
1189 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1191 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1192 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1193 - now.offset_within_address_space;
1195 now.size = int128_min(int128_make64(left), now.size);
1196 register_subpage(d, &now);
1197 } else {
1198 now.size = int128_zero();
1200 while (int128_ne(remain.size, now.size)) {
1201 remain.size = int128_sub(remain.size, now.size);
1202 remain.offset_within_address_space += int128_get64(now.size);
1203 remain.offset_within_region += int128_get64(now.size);
1204 now = remain;
1205 if (int128_lt(remain.size, page_size)) {
1206 register_subpage(d, &now);
1207 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1208 now.size = page_size;
1209 register_subpage(d, &now);
1210 } else {
1211 now.size = int128_and(now.size, int128_neg(page_size));
1212 register_multipage(d, &now);
1217 void qemu_flush_coalesced_mmio_buffer(void)
1219 if (kvm_enabled())
1220 kvm_flush_coalesced_mmio_buffer();
1223 void qemu_mutex_lock_ramlist(void)
1225 qemu_mutex_lock(&ram_list.mutex);
1228 void qemu_mutex_unlock_ramlist(void)
1230 qemu_mutex_unlock(&ram_list.mutex);
1233 #ifdef __linux__
1234 static int64_t get_file_size(int fd)
1236 int64_t size = lseek(fd, 0, SEEK_END);
1237 if (size < 0) {
1238 return -errno;
1240 return size;
1243 static void *file_ram_alloc(RAMBlock *block,
1244 ram_addr_t memory,
1245 const char *path,
1246 Error **errp)
1248 bool unlink_on_error = false;
1249 char *filename;
1250 char *sanitized_name;
1251 char *c;
1252 void *area = MAP_FAILED;
1253 int fd = -1;
1254 int64_t file_size;
1256 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1257 error_setg(errp,
1258 "host lacks kvm mmu notifiers, -mem-path unsupported");
1259 return NULL;
1262 for (;;) {
1263 fd = open(path, O_RDWR);
1264 if (fd >= 0) {
1265 /* @path names an existing file, use it */
1266 break;
1268 if (errno == ENOENT) {
1269 /* @path names a file that doesn't exist, create it */
1270 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1271 if (fd >= 0) {
1272 unlink_on_error = true;
1273 break;
1275 } else if (errno == EISDIR) {
1276 /* @path names a directory, create a file there */
1277 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1278 sanitized_name = g_strdup(memory_region_name(block->mr));
1279 for (c = sanitized_name; *c != '\0'; c++) {
1280 if (*c == '/') {
1281 *c = '_';
1285 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1286 sanitized_name);
1287 g_free(sanitized_name);
1289 fd = mkstemp(filename);
1290 if (fd >= 0) {
1291 unlink(filename);
1292 g_free(filename);
1293 break;
1295 g_free(filename);
1297 if (errno != EEXIST && errno != EINTR) {
1298 error_setg_errno(errp, errno,
1299 "can't open backing store %s for guest RAM",
1300 path);
1301 goto error;
1304 * Try again on EINTR and EEXIST. The latter happens when
1305 * something else creates the file between our two open().
1309 block->page_size = qemu_fd_getpagesize(fd);
1310 block->mr->align = block->page_size;
1311 #if defined(__s390x__)
1312 if (kvm_enabled()) {
1313 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1315 #endif
1317 file_size = get_file_size(fd);
1319 if (memory < block->page_size) {
1320 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1321 "or larger than page size 0x%zx",
1322 memory, block->page_size);
1323 goto error;
1326 if (file_size > 0 && file_size < memory) {
1327 error_setg(errp, "backing store %s size 0x%" PRIx64
1328 " does not match 'size' option 0x" RAM_ADDR_FMT,
1329 path, file_size, memory);
1330 goto error;
1333 memory = ROUND_UP(memory, block->page_size);
1336 * ftruncate is not supported by hugetlbfs in older
1337 * hosts, so don't bother bailing out on errors.
1338 * If anything goes wrong with it under other filesystems,
1339 * mmap will fail.
1341 * Do not truncate the non-empty backend file to avoid corrupting
1342 * the existing data in the file. Disabling shrinking is not
1343 * enough. For example, the current vNVDIMM implementation stores
1344 * the guest NVDIMM labels at the end of the backend file. If the
1345 * backend file is later extended, QEMU will not be able to find
1346 * those labels. Therefore, extending the non-empty backend file
1347 * is disabled as well.
1349 if (!file_size && ftruncate(fd, memory)) {
1350 perror("ftruncate");
1353 area = qemu_ram_mmap(fd, memory, block->mr->align,
1354 block->flags & RAM_SHARED);
1355 if (area == MAP_FAILED) {
1356 error_setg_errno(errp, errno,
1357 "unable to map backing store for guest RAM");
1358 goto error;
1361 if (mem_prealloc) {
1362 os_mem_prealloc(fd, area, memory, errp);
1363 if (errp && *errp) {
1364 goto error;
1368 block->fd = fd;
1369 return area;
1371 error:
1372 if (area != MAP_FAILED) {
1373 qemu_ram_munmap(area, memory);
1375 if (unlink_on_error) {
1376 unlink(path);
1378 if (fd != -1) {
1379 close(fd);
1381 return NULL;
1383 #endif
1385 /* Called with the ramlist lock held. */
1386 static ram_addr_t find_ram_offset(ram_addr_t size)
1388 RAMBlock *block, *next_block;
1389 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1391 assert(size != 0); /* it would hand out same offset multiple times */
1393 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1394 return 0;
1397 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1398 ram_addr_t end, next = RAM_ADDR_MAX;
1400 end = block->offset + block->max_length;
1402 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1403 if (next_block->offset >= end) {
1404 next = MIN(next, next_block->offset);
1407 if (next - end >= size && next - end < mingap) {
1408 offset = end;
1409 mingap = next - end;
1413 if (offset == RAM_ADDR_MAX) {
1414 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1415 (uint64_t)size);
1416 abort();
1419 return offset;
1422 ram_addr_t last_ram_offset(void)
1424 RAMBlock *block;
1425 ram_addr_t last = 0;
1427 rcu_read_lock();
1428 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1429 last = MAX(last, block->offset + block->max_length);
1431 rcu_read_unlock();
1432 return last;
1435 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1437 int ret;
1439 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1440 if (!machine_dump_guest_core(current_machine)) {
1441 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1442 if (ret) {
1443 perror("qemu_madvise");
1444 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1445 "but dump_guest_core=off specified\n");
1450 const char *qemu_ram_get_idstr(RAMBlock *rb)
1452 return rb->idstr;
1455 /* Called with iothread lock held. */
1456 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1458 RAMBlock *block;
1460 assert(new_block);
1461 assert(!new_block->idstr[0]);
1463 if (dev) {
1464 char *id = qdev_get_dev_path(dev);
1465 if (id) {
1466 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1467 g_free(id);
1470 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1472 rcu_read_lock();
1473 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1474 if (block != new_block &&
1475 !strcmp(block->idstr, new_block->idstr)) {
1476 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1477 new_block->idstr);
1478 abort();
1481 rcu_read_unlock();
1484 /* Called with iothread lock held. */
1485 void qemu_ram_unset_idstr(RAMBlock *block)
1487 /* FIXME: arch_init.c assumes that this is not called throughout
1488 * migration. Ignore the problem since hot-unplug during migration
1489 * does not work anyway.
1491 if (block) {
1492 memset(block->idstr, 0, sizeof(block->idstr));
1496 size_t qemu_ram_pagesize(RAMBlock *rb)
1498 return rb->page_size;
1501 static int memory_try_enable_merging(void *addr, size_t len)
1503 if (!machine_mem_merge(current_machine)) {
1504 /* disabled by the user */
1505 return 0;
1508 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1511 /* Only legal before guest might have detected the memory size: e.g. on
1512 * incoming migration, or right after reset.
1514 * As memory core doesn't know how is memory accessed, it is up to
1515 * resize callback to update device state and/or add assertions to detect
1516 * misuse, if necessary.
1518 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1520 assert(block);
1522 newsize = HOST_PAGE_ALIGN(newsize);
1524 if (block->used_length == newsize) {
1525 return 0;
1528 if (!(block->flags & RAM_RESIZEABLE)) {
1529 error_setg_errno(errp, EINVAL,
1530 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1531 " in != 0x" RAM_ADDR_FMT, block->idstr,
1532 newsize, block->used_length);
1533 return -EINVAL;
1536 if (block->max_length < newsize) {
1537 error_setg_errno(errp, EINVAL,
1538 "Length too large: %s: 0x" RAM_ADDR_FMT
1539 " > 0x" RAM_ADDR_FMT, block->idstr,
1540 newsize, block->max_length);
1541 return -EINVAL;
1544 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1545 block->used_length = newsize;
1546 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1547 DIRTY_CLIENTS_ALL);
1548 memory_region_set_size(block->mr, newsize);
1549 if (block->resized) {
1550 block->resized(block->idstr, newsize, block->host);
1552 return 0;
1555 /* Called with ram_list.mutex held */
1556 static void dirty_memory_extend(ram_addr_t old_ram_size,
1557 ram_addr_t new_ram_size)
1559 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1560 DIRTY_MEMORY_BLOCK_SIZE);
1561 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1562 DIRTY_MEMORY_BLOCK_SIZE);
1563 int i;
1565 /* Only need to extend if block count increased */
1566 if (new_num_blocks <= old_num_blocks) {
1567 return;
1570 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1571 DirtyMemoryBlocks *old_blocks;
1572 DirtyMemoryBlocks *new_blocks;
1573 int j;
1575 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1576 new_blocks = g_malloc(sizeof(*new_blocks) +
1577 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1579 if (old_num_blocks) {
1580 memcpy(new_blocks->blocks, old_blocks->blocks,
1581 old_num_blocks * sizeof(old_blocks->blocks[0]));
1584 for (j = old_num_blocks; j < new_num_blocks; j++) {
1585 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1588 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1590 if (old_blocks) {
1591 g_free_rcu(old_blocks, rcu);
1596 static void ram_block_add(RAMBlock *new_block, Error **errp)
1598 RAMBlock *block;
1599 RAMBlock *last_block = NULL;
1600 ram_addr_t old_ram_size, new_ram_size;
1601 Error *err = NULL;
1603 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1605 qemu_mutex_lock_ramlist();
1606 new_block->offset = find_ram_offset(new_block->max_length);
1608 if (!new_block->host) {
1609 if (xen_enabled()) {
1610 xen_ram_alloc(new_block->offset, new_block->max_length,
1611 new_block->mr, &err);
1612 if (err) {
1613 error_propagate(errp, err);
1614 qemu_mutex_unlock_ramlist();
1615 return;
1617 } else {
1618 new_block->host = phys_mem_alloc(new_block->max_length,
1619 &new_block->mr->align);
1620 if (!new_block->host) {
1621 error_setg_errno(errp, errno,
1622 "cannot set up guest memory '%s'",
1623 memory_region_name(new_block->mr));
1624 qemu_mutex_unlock_ramlist();
1625 return;
1627 memory_try_enable_merging(new_block->host, new_block->max_length);
1631 new_ram_size = MAX(old_ram_size,
1632 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1633 if (new_ram_size > old_ram_size) {
1634 migration_bitmap_extend(old_ram_size, new_ram_size);
1635 dirty_memory_extend(old_ram_size, new_ram_size);
1637 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1638 * QLIST (which has an RCU-friendly variant) does not have insertion at
1639 * tail, so save the last element in last_block.
1641 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1642 last_block = block;
1643 if (block->max_length < new_block->max_length) {
1644 break;
1647 if (block) {
1648 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1649 } else if (last_block) {
1650 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1651 } else { /* list is empty */
1652 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1654 ram_list.mru_block = NULL;
1656 /* Write list before version */
1657 smp_wmb();
1658 ram_list.version++;
1659 qemu_mutex_unlock_ramlist();
1661 cpu_physical_memory_set_dirty_range(new_block->offset,
1662 new_block->used_length,
1663 DIRTY_CLIENTS_ALL);
1665 if (new_block->host) {
1666 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1667 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1668 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1669 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1673 #ifdef __linux__
1674 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1675 bool share, const char *mem_path,
1676 Error **errp)
1678 RAMBlock *new_block;
1679 Error *local_err = NULL;
1681 if (xen_enabled()) {
1682 error_setg(errp, "-mem-path not supported with Xen");
1683 return NULL;
1686 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1688 * file_ram_alloc() needs to allocate just like
1689 * phys_mem_alloc, but we haven't bothered to provide
1690 * a hook there.
1692 error_setg(errp,
1693 "-mem-path not supported with this accelerator");
1694 return NULL;
1697 size = HOST_PAGE_ALIGN(size);
1698 new_block = g_malloc0(sizeof(*new_block));
1699 new_block->mr = mr;
1700 new_block->used_length = size;
1701 new_block->max_length = size;
1702 new_block->flags = share ? RAM_SHARED : 0;
1703 new_block->host = file_ram_alloc(new_block, size,
1704 mem_path, errp);
1705 if (!new_block->host) {
1706 g_free(new_block);
1707 return NULL;
1710 ram_block_add(new_block, &local_err);
1711 if (local_err) {
1712 g_free(new_block);
1713 error_propagate(errp, local_err);
1714 return NULL;
1716 return new_block;
1718 #endif
1720 static
1721 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1722 void (*resized)(const char*,
1723 uint64_t length,
1724 void *host),
1725 void *host, bool resizeable,
1726 MemoryRegion *mr, Error **errp)
1728 RAMBlock *new_block;
1729 Error *local_err = NULL;
1731 size = HOST_PAGE_ALIGN(size);
1732 max_size = HOST_PAGE_ALIGN(max_size);
1733 new_block = g_malloc0(sizeof(*new_block));
1734 new_block->mr = mr;
1735 new_block->resized = resized;
1736 new_block->used_length = size;
1737 new_block->max_length = max_size;
1738 assert(max_size >= size);
1739 new_block->fd = -1;
1740 new_block->page_size = getpagesize();
1741 new_block->host = host;
1742 if (host) {
1743 new_block->flags |= RAM_PREALLOC;
1745 if (resizeable) {
1746 new_block->flags |= RAM_RESIZEABLE;
1748 ram_block_add(new_block, &local_err);
1749 if (local_err) {
1750 g_free(new_block);
1751 error_propagate(errp, local_err);
1752 return NULL;
1754 return new_block;
1757 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1758 MemoryRegion *mr, Error **errp)
1760 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1763 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1765 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1768 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1769 void (*resized)(const char*,
1770 uint64_t length,
1771 void *host),
1772 MemoryRegion *mr, Error **errp)
1774 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1777 static void reclaim_ramblock(RAMBlock *block)
1779 if (block->flags & RAM_PREALLOC) {
1781 } else if (xen_enabled()) {
1782 xen_invalidate_map_cache_entry(block->host);
1783 #ifndef _WIN32
1784 } else if (block->fd >= 0) {
1785 qemu_ram_munmap(block->host, block->max_length);
1786 close(block->fd);
1787 #endif
1788 } else {
1789 qemu_anon_ram_free(block->host, block->max_length);
1791 g_free(block);
1794 void qemu_ram_free(RAMBlock *block)
1796 if (!block) {
1797 return;
1800 qemu_mutex_lock_ramlist();
1801 QLIST_REMOVE_RCU(block, next);
1802 ram_list.mru_block = NULL;
1803 /* Write list before version */
1804 smp_wmb();
1805 ram_list.version++;
1806 call_rcu(block, reclaim_ramblock, rcu);
1807 qemu_mutex_unlock_ramlist();
1810 #ifndef _WIN32
1811 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1813 RAMBlock *block;
1814 ram_addr_t offset;
1815 int flags;
1816 void *area, *vaddr;
1818 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1819 offset = addr - block->offset;
1820 if (offset < block->max_length) {
1821 vaddr = ramblock_ptr(block, offset);
1822 if (block->flags & RAM_PREALLOC) {
1824 } else if (xen_enabled()) {
1825 abort();
1826 } else {
1827 flags = MAP_FIXED;
1828 if (block->fd >= 0) {
1829 flags |= (block->flags & RAM_SHARED ?
1830 MAP_SHARED : MAP_PRIVATE);
1831 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1832 flags, block->fd, offset);
1833 } else {
1835 * Remap needs to match alloc. Accelerators that
1836 * set phys_mem_alloc never remap. If they did,
1837 * we'd need a remap hook here.
1839 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1841 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1842 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1843 flags, -1, 0);
1845 if (area != vaddr) {
1846 fprintf(stderr, "Could not remap addr: "
1847 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1848 length, addr);
1849 exit(1);
1851 memory_try_enable_merging(vaddr, length);
1852 qemu_ram_setup_dump(vaddr, length);
1857 #endif /* !_WIN32 */
1859 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1860 * This should not be used for general purpose DMA. Use address_space_map
1861 * or address_space_rw instead. For local memory (e.g. video ram) that the
1862 * device owns, use memory_region_get_ram_ptr.
1864 * Called within RCU critical section.
1866 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1868 RAMBlock *block = ram_block;
1870 if (block == NULL) {
1871 block = qemu_get_ram_block(addr);
1872 addr -= block->offset;
1875 if (xen_enabled() && block->host == NULL) {
1876 /* We need to check if the requested address is in the RAM
1877 * because we don't want to map the entire memory in QEMU.
1878 * In that case just map until the end of the page.
1880 if (block->offset == 0) {
1881 return xen_map_cache(addr, 0, 0);
1884 block->host = xen_map_cache(block->offset, block->max_length, 1);
1886 return ramblock_ptr(block, addr);
1889 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1890 * but takes a size argument.
1892 * Called within RCU critical section.
1894 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1895 hwaddr *size)
1897 RAMBlock *block = ram_block;
1898 if (*size == 0) {
1899 return NULL;
1902 if (block == NULL) {
1903 block = qemu_get_ram_block(addr);
1904 addr -= block->offset;
1906 *size = MIN(*size, block->max_length - addr);
1908 if (xen_enabled() && block->host == NULL) {
1909 /* We need to check if the requested address is in the RAM
1910 * because we don't want to map the entire memory in QEMU.
1911 * In that case just map the requested area.
1913 if (block->offset == 0) {
1914 return xen_map_cache(addr, *size, 1);
1917 block->host = xen_map_cache(block->offset, block->max_length, 1);
1920 return ramblock_ptr(block, addr);
1924 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1925 * in that RAMBlock.
1927 * ptr: Host pointer to look up
1928 * round_offset: If true round the result offset down to a page boundary
1929 * *ram_addr: set to result ram_addr
1930 * *offset: set to result offset within the RAMBlock
1932 * Returns: RAMBlock (or NULL if not found)
1934 * By the time this function returns, the returned pointer is not protected
1935 * by RCU anymore. If the caller is not within an RCU critical section and
1936 * does not hold the iothread lock, it must have other means of protecting the
1937 * pointer, such as a reference to the region that includes the incoming
1938 * ram_addr_t.
1940 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1941 ram_addr_t *offset)
1943 RAMBlock *block;
1944 uint8_t *host = ptr;
1946 if (xen_enabled()) {
1947 ram_addr_t ram_addr;
1948 rcu_read_lock();
1949 ram_addr = xen_ram_addr_from_mapcache(ptr);
1950 block = qemu_get_ram_block(ram_addr);
1951 if (block) {
1952 *offset = ram_addr - block->offset;
1954 rcu_read_unlock();
1955 return block;
1958 rcu_read_lock();
1959 block = atomic_rcu_read(&ram_list.mru_block);
1960 if (block && block->host && host - block->host < block->max_length) {
1961 goto found;
1964 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1965 /* This case append when the block is not mapped. */
1966 if (block->host == NULL) {
1967 continue;
1969 if (host - block->host < block->max_length) {
1970 goto found;
1974 rcu_read_unlock();
1975 return NULL;
1977 found:
1978 *offset = (host - block->host);
1979 if (round_offset) {
1980 *offset &= TARGET_PAGE_MASK;
1982 rcu_read_unlock();
1983 return block;
1987 * Finds the named RAMBlock
1989 * name: The name of RAMBlock to find
1991 * Returns: RAMBlock (or NULL if not found)
1993 RAMBlock *qemu_ram_block_by_name(const char *name)
1995 RAMBlock *block;
1997 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1998 if (!strcmp(name, block->idstr)) {
1999 return block;
2003 return NULL;
2006 /* Some of the softmmu routines need to translate from a host pointer
2007 (typically a TLB entry) back to a ram offset. */
2008 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2010 RAMBlock *block;
2011 ram_addr_t offset;
2013 block = qemu_ram_block_from_host(ptr, false, &offset);
2014 if (!block) {
2015 return RAM_ADDR_INVALID;
2018 return block->offset + offset;
2021 /* Called within RCU critical section. */
2022 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2023 uint64_t val, unsigned size)
2025 bool locked = false;
2027 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2028 locked = true;
2029 tb_lock();
2030 tb_invalidate_phys_page_fast(ram_addr, size);
2032 switch (size) {
2033 case 1:
2034 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2035 break;
2036 case 2:
2037 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2038 break;
2039 case 4:
2040 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2041 break;
2042 default:
2043 abort();
2046 if (locked) {
2047 tb_unlock();
2050 /* Set both VGA and migration bits for simplicity and to remove
2051 * the notdirty callback faster.
2053 cpu_physical_memory_set_dirty_range(ram_addr, size,
2054 DIRTY_CLIENTS_NOCODE);
2055 /* we remove the notdirty callback only if the code has been
2056 flushed */
2057 if (!cpu_physical_memory_is_clean(ram_addr)) {
2058 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2062 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2063 unsigned size, bool is_write)
2065 return is_write;
2068 static const MemoryRegionOps notdirty_mem_ops = {
2069 .write = notdirty_mem_write,
2070 .valid.accepts = notdirty_mem_accepts,
2071 .endianness = DEVICE_NATIVE_ENDIAN,
2074 /* Generate a debug exception if a watchpoint has been hit. */
2075 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2077 CPUState *cpu = current_cpu;
2078 CPUClass *cc = CPU_GET_CLASS(cpu);
2079 CPUArchState *env = cpu->env_ptr;
2080 target_ulong pc, cs_base;
2081 target_ulong vaddr;
2082 CPUWatchpoint *wp;
2083 uint32_t cpu_flags;
2085 if (cpu->watchpoint_hit) {
2086 /* We re-entered the check after replacing the TB. Now raise
2087 * the debug interrupt so that is will trigger after the
2088 * current instruction. */
2089 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2090 return;
2092 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2093 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2094 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2095 && (wp->flags & flags)) {
2096 if (flags == BP_MEM_READ) {
2097 wp->flags |= BP_WATCHPOINT_HIT_READ;
2098 } else {
2099 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2101 wp->hitaddr = vaddr;
2102 wp->hitattrs = attrs;
2103 if (!cpu->watchpoint_hit) {
2104 if (wp->flags & BP_CPU &&
2105 !cc->debug_check_watchpoint(cpu, wp)) {
2106 wp->flags &= ~BP_WATCHPOINT_HIT;
2107 continue;
2109 cpu->watchpoint_hit = wp;
2111 /* The tb_lock will be reset when cpu_loop_exit or
2112 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2113 * main loop.
2115 tb_lock();
2116 tb_check_watchpoint(cpu);
2117 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2118 cpu->exception_index = EXCP_DEBUG;
2119 cpu_loop_exit(cpu);
2120 } else {
2121 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2122 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2123 cpu_loop_exit_noexc(cpu);
2126 } else {
2127 wp->flags &= ~BP_WATCHPOINT_HIT;
2132 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2133 so these check for a hit then pass through to the normal out-of-line
2134 phys routines. */
2135 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2136 unsigned size, MemTxAttrs attrs)
2138 MemTxResult res;
2139 uint64_t data;
2140 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2141 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2143 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2144 switch (size) {
2145 case 1:
2146 data = address_space_ldub(as, addr, attrs, &res);
2147 break;
2148 case 2:
2149 data = address_space_lduw(as, addr, attrs, &res);
2150 break;
2151 case 4:
2152 data = address_space_ldl(as, addr, attrs, &res);
2153 break;
2154 default: abort();
2156 *pdata = data;
2157 return res;
2160 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2161 uint64_t val, unsigned size,
2162 MemTxAttrs attrs)
2164 MemTxResult res;
2165 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2166 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2168 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2169 switch (size) {
2170 case 1:
2171 address_space_stb(as, addr, val, attrs, &res);
2172 break;
2173 case 2:
2174 address_space_stw(as, addr, val, attrs, &res);
2175 break;
2176 case 4:
2177 address_space_stl(as, addr, val, attrs, &res);
2178 break;
2179 default: abort();
2181 return res;
2184 static const MemoryRegionOps watch_mem_ops = {
2185 .read_with_attrs = watch_mem_read,
2186 .write_with_attrs = watch_mem_write,
2187 .endianness = DEVICE_NATIVE_ENDIAN,
2190 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2191 unsigned len, MemTxAttrs attrs)
2193 subpage_t *subpage = opaque;
2194 uint8_t buf[8];
2195 MemTxResult res;
2197 #if defined(DEBUG_SUBPAGE)
2198 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2199 subpage, len, addr);
2200 #endif
2201 res = address_space_read(subpage->as, addr + subpage->base,
2202 attrs, buf, len);
2203 if (res) {
2204 return res;
2206 switch (len) {
2207 case 1:
2208 *data = ldub_p(buf);
2209 return MEMTX_OK;
2210 case 2:
2211 *data = lduw_p(buf);
2212 return MEMTX_OK;
2213 case 4:
2214 *data = ldl_p(buf);
2215 return MEMTX_OK;
2216 case 8:
2217 *data = ldq_p(buf);
2218 return MEMTX_OK;
2219 default:
2220 abort();
2224 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2225 uint64_t value, unsigned len, MemTxAttrs attrs)
2227 subpage_t *subpage = opaque;
2228 uint8_t buf[8];
2230 #if defined(DEBUG_SUBPAGE)
2231 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2232 " value %"PRIx64"\n",
2233 __func__, subpage, len, addr, value);
2234 #endif
2235 switch (len) {
2236 case 1:
2237 stb_p(buf, value);
2238 break;
2239 case 2:
2240 stw_p(buf, value);
2241 break;
2242 case 4:
2243 stl_p(buf, value);
2244 break;
2245 case 8:
2246 stq_p(buf, value);
2247 break;
2248 default:
2249 abort();
2251 return address_space_write(subpage->as, addr + subpage->base,
2252 attrs, buf, len);
2255 static bool subpage_accepts(void *opaque, hwaddr addr,
2256 unsigned len, bool is_write)
2258 subpage_t *subpage = opaque;
2259 #if defined(DEBUG_SUBPAGE)
2260 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2261 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2262 #endif
2264 return address_space_access_valid(subpage->as, addr + subpage->base,
2265 len, is_write);
2268 static const MemoryRegionOps subpage_ops = {
2269 .read_with_attrs = subpage_read,
2270 .write_with_attrs = subpage_write,
2271 .impl.min_access_size = 1,
2272 .impl.max_access_size = 8,
2273 .valid.min_access_size = 1,
2274 .valid.max_access_size = 8,
2275 .valid.accepts = subpage_accepts,
2276 .endianness = DEVICE_NATIVE_ENDIAN,
2279 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2280 uint16_t section)
2282 int idx, eidx;
2284 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2285 return -1;
2286 idx = SUBPAGE_IDX(start);
2287 eidx = SUBPAGE_IDX(end);
2288 #if defined(DEBUG_SUBPAGE)
2289 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2290 __func__, mmio, start, end, idx, eidx, section);
2291 #endif
2292 for (; idx <= eidx; idx++) {
2293 mmio->sub_section[idx] = section;
2296 return 0;
2299 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2301 subpage_t *mmio;
2303 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2304 mmio->as = as;
2305 mmio->base = base;
2306 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2307 NULL, TARGET_PAGE_SIZE);
2308 mmio->iomem.subpage = true;
2309 #if defined(DEBUG_SUBPAGE)
2310 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2311 mmio, base, TARGET_PAGE_SIZE);
2312 #endif
2313 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2315 return mmio;
2318 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2319 MemoryRegion *mr)
2321 assert(as);
2322 MemoryRegionSection section = {
2323 .address_space = as,
2324 .mr = mr,
2325 .offset_within_address_space = 0,
2326 .offset_within_region = 0,
2327 .size = int128_2_64(),
2330 return phys_section_add(map, &section);
2333 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2335 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2336 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2337 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2338 MemoryRegionSection *sections = d->map.sections;
2340 return sections[index & ~TARGET_PAGE_MASK].mr;
2343 static void io_mem_init(void)
2345 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2346 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2347 NULL, UINT64_MAX);
2348 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2349 NULL, UINT64_MAX);
2350 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2351 NULL, UINT64_MAX);
2354 static void mem_begin(MemoryListener *listener)
2356 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2357 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2358 uint16_t n;
2360 n = dummy_section(&d->map, as, &io_mem_unassigned);
2361 assert(n == PHYS_SECTION_UNASSIGNED);
2362 n = dummy_section(&d->map, as, &io_mem_notdirty);
2363 assert(n == PHYS_SECTION_NOTDIRTY);
2364 n = dummy_section(&d->map, as, &io_mem_rom);
2365 assert(n == PHYS_SECTION_ROM);
2366 n = dummy_section(&d->map, as, &io_mem_watch);
2367 assert(n == PHYS_SECTION_WATCH);
2369 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2370 d->as = as;
2371 as->next_dispatch = d;
2374 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2376 phys_sections_free(&d->map);
2377 g_free(d);
2380 static void mem_commit(MemoryListener *listener)
2382 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2383 AddressSpaceDispatch *cur = as->dispatch;
2384 AddressSpaceDispatch *next = as->next_dispatch;
2386 phys_page_compact_all(next, next->map.nodes_nb);
2388 atomic_rcu_set(&as->dispatch, next);
2389 if (cur) {
2390 call_rcu(cur, address_space_dispatch_free, rcu);
2394 static void tcg_commit(MemoryListener *listener)
2396 CPUAddressSpace *cpuas;
2397 AddressSpaceDispatch *d;
2399 /* since each CPU stores ram addresses in its TLB cache, we must
2400 reset the modified entries */
2401 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2402 cpu_reloading_memory_map();
2403 /* The CPU and TLB are protected by the iothread lock.
2404 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2405 * may have split the RCU critical section.
2407 d = atomic_rcu_read(&cpuas->as->dispatch);
2408 atomic_rcu_set(&cpuas->memory_dispatch, d);
2409 tlb_flush(cpuas->cpu, 1);
2412 void address_space_init_dispatch(AddressSpace *as)
2414 as->dispatch = NULL;
2415 as->dispatch_listener = (MemoryListener) {
2416 .begin = mem_begin,
2417 .commit = mem_commit,
2418 .region_add = mem_add,
2419 .region_nop = mem_add,
2420 .priority = 0,
2422 memory_listener_register(&as->dispatch_listener, as);
2425 void address_space_unregister(AddressSpace *as)
2427 memory_listener_unregister(&as->dispatch_listener);
2430 void address_space_destroy_dispatch(AddressSpace *as)
2432 AddressSpaceDispatch *d = as->dispatch;
2434 atomic_rcu_set(&as->dispatch, NULL);
2435 if (d) {
2436 call_rcu(d, address_space_dispatch_free, rcu);
2440 static void memory_map_init(void)
2442 system_memory = g_malloc(sizeof(*system_memory));
2444 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2445 address_space_init(&address_space_memory, system_memory, "memory");
2447 system_io = g_malloc(sizeof(*system_io));
2448 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2449 65536);
2450 address_space_init(&address_space_io, system_io, "I/O");
2453 MemoryRegion *get_system_memory(void)
2455 return system_memory;
2458 MemoryRegion *get_system_io(void)
2460 return system_io;
2463 #endif /* !defined(CONFIG_USER_ONLY) */
2465 /* physical memory access (slow version, mainly for debug) */
2466 #if defined(CONFIG_USER_ONLY)
2467 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2468 uint8_t *buf, int len, int is_write)
2470 int l, flags;
2471 target_ulong page;
2472 void * p;
2474 while (len > 0) {
2475 page = addr & TARGET_PAGE_MASK;
2476 l = (page + TARGET_PAGE_SIZE) - addr;
2477 if (l > len)
2478 l = len;
2479 flags = page_get_flags(page);
2480 if (!(flags & PAGE_VALID))
2481 return -1;
2482 if (is_write) {
2483 if (!(flags & PAGE_WRITE))
2484 return -1;
2485 /* XXX: this code should not depend on lock_user */
2486 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2487 return -1;
2488 memcpy(p, buf, l);
2489 unlock_user(p, addr, l);
2490 } else {
2491 if (!(flags & PAGE_READ))
2492 return -1;
2493 /* XXX: this code should not depend on lock_user */
2494 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2495 return -1;
2496 memcpy(buf, p, l);
2497 unlock_user(p, addr, 0);
2499 len -= l;
2500 buf += l;
2501 addr += l;
2503 return 0;
2506 #else
2508 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2509 hwaddr length)
2511 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2512 addr += memory_region_get_ram_addr(mr);
2514 /* No early return if dirty_log_mask is or becomes 0, because
2515 * cpu_physical_memory_set_dirty_range will still call
2516 * xen_modified_memory.
2518 if (dirty_log_mask) {
2519 dirty_log_mask =
2520 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2522 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2523 tb_lock();
2524 tb_invalidate_phys_range(addr, addr + length);
2525 tb_unlock();
2526 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2528 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2531 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2533 unsigned access_size_max = mr->ops->valid.max_access_size;
2535 /* Regions are assumed to support 1-4 byte accesses unless
2536 otherwise specified. */
2537 if (access_size_max == 0) {
2538 access_size_max = 4;
2541 /* Bound the maximum access by the alignment of the address. */
2542 if (!mr->ops->impl.unaligned) {
2543 unsigned align_size_max = addr & -addr;
2544 if (align_size_max != 0 && align_size_max < access_size_max) {
2545 access_size_max = align_size_max;
2549 /* Don't attempt accesses larger than the maximum. */
2550 if (l > access_size_max) {
2551 l = access_size_max;
2553 l = pow2floor(l);
2555 return l;
2558 static bool prepare_mmio_access(MemoryRegion *mr)
2560 bool unlocked = !qemu_mutex_iothread_locked();
2561 bool release_lock = false;
2563 if (unlocked && mr->global_locking) {
2564 qemu_mutex_lock_iothread();
2565 unlocked = false;
2566 release_lock = true;
2568 if (mr->flush_coalesced_mmio) {
2569 if (unlocked) {
2570 qemu_mutex_lock_iothread();
2572 qemu_flush_coalesced_mmio_buffer();
2573 if (unlocked) {
2574 qemu_mutex_unlock_iothread();
2578 return release_lock;
2581 /* Called within RCU critical section. */
2582 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2583 MemTxAttrs attrs,
2584 const uint8_t *buf,
2585 int len, hwaddr addr1,
2586 hwaddr l, MemoryRegion *mr)
2588 uint8_t *ptr;
2589 uint64_t val;
2590 MemTxResult result = MEMTX_OK;
2591 bool release_lock = false;
2593 for (;;) {
2594 if (!memory_access_is_direct(mr, true)) {
2595 release_lock |= prepare_mmio_access(mr);
2596 l = memory_access_size(mr, l, addr1);
2597 /* XXX: could force current_cpu to NULL to avoid
2598 potential bugs */
2599 switch (l) {
2600 case 8:
2601 /* 64 bit write access */
2602 val = ldq_p(buf);
2603 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2604 attrs);
2605 break;
2606 case 4:
2607 /* 32 bit write access */
2608 val = ldl_p(buf);
2609 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2610 attrs);
2611 break;
2612 case 2:
2613 /* 16 bit write access */
2614 val = lduw_p(buf);
2615 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2616 attrs);
2617 break;
2618 case 1:
2619 /* 8 bit write access */
2620 val = ldub_p(buf);
2621 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2622 attrs);
2623 break;
2624 default:
2625 abort();
2627 } else {
2628 /* RAM case */
2629 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2630 memcpy(ptr, buf, l);
2631 invalidate_and_set_dirty(mr, addr1, l);
2634 if (release_lock) {
2635 qemu_mutex_unlock_iothread();
2636 release_lock = false;
2639 len -= l;
2640 buf += l;
2641 addr += l;
2643 if (!len) {
2644 break;
2647 l = len;
2648 mr = address_space_translate(as, addr, &addr1, &l, true);
2651 return result;
2654 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2655 const uint8_t *buf, int len)
2657 hwaddr l;
2658 hwaddr addr1;
2659 MemoryRegion *mr;
2660 MemTxResult result = MEMTX_OK;
2662 if (len > 0) {
2663 rcu_read_lock();
2664 l = len;
2665 mr = address_space_translate(as, addr, &addr1, &l, true);
2666 result = address_space_write_continue(as, addr, attrs, buf, len,
2667 addr1, l, mr);
2668 rcu_read_unlock();
2671 return result;
2674 /* Called within RCU critical section. */
2675 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2676 MemTxAttrs attrs, uint8_t *buf,
2677 int len, hwaddr addr1, hwaddr l,
2678 MemoryRegion *mr)
2680 uint8_t *ptr;
2681 uint64_t val;
2682 MemTxResult result = MEMTX_OK;
2683 bool release_lock = false;
2685 for (;;) {
2686 if (!memory_access_is_direct(mr, false)) {
2687 /* I/O case */
2688 release_lock |= prepare_mmio_access(mr);
2689 l = memory_access_size(mr, l, addr1);
2690 switch (l) {
2691 case 8:
2692 /* 64 bit read access */
2693 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2694 attrs);
2695 stq_p(buf, val);
2696 break;
2697 case 4:
2698 /* 32 bit read access */
2699 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2700 attrs);
2701 stl_p(buf, val);
2702 break;
2703 case 2:
2704 /* 16 bit read access */
2705 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2706 attrs);
2707 stw_p(buf, val);
2708 break;
2709 case 1:
2710 /* 8 bit read access */
2711 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2712 attrs);
2713 stb_p(buf, val);
2714 break;
2715 default:
2716 abort();
2718 } else {
2719 /* RAM case */
2720 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2721 memcpy(buf, ptr, l);
2724 if (release_lock) {
2725 qemu_mutex_unlock_iothread();
2726 release_lock = false;
2729 len -= l;
2730 buf += l;
2731 addr += l;
2733 if (!len) {
2734 break;
2737 l = len;
2738 mr = address_space_translate(as, addr, &addr1, &l, false);
2741 return result;
2744 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2745 MemTxAttrs attrs, uint8_t *buf, int len)
2747 hwaddr l;
2748 hwaddr addr1;
2749 MemoryRegion *mr;
2750 MemTxResult result = MEMTX_OK;
2752 if (len > 0) {
2753 rcu_read_lock();
2754 l = len;
2755 mr = address_space_translate(as, addr, &addr1, &l, false);
2756 result = address_space_read_continue(as, addr, attrs, buf, len,
2757 addr1, l, mr);
2758 rcu_read_unlock();
2761 return result;
2764 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2765 uint8_t *buf, int len, bool is_write)
2767 if (is_write) {
2768 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2769 } else {
2770 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2774 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2775 int len, int is_write)
2777 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2778 buf, len, is_write);
2781 enum write_rom_type {
2782 WRITE_DATA,
2783 FLUSH_CACHE,
2786 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2787 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2789 hwaddr l;
2790 uint8_t *ptr;
2791 hwaddr addr1;
2792 MemoryRegion *mr;
2794 rcu_read_lock();
2795 while (len > 0) {
2796 l = len;
2797 mr = address_space_translate(as, addr, &addr1, &l, true);
2799 if (!(memory_region_is_ram(mr) ||
2800 memory_region_is_romd(mr))) {
2801 l = memory_access_size(mr, l, addr1);
2802 } else {
2803 /* ROM/RAM case */
2804 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2805 switch (type) {
2806 case WRITE_DATA:
2807 memcpy(ptr, buf, l);
2808 invalidate_and_set_dirty(mr, addr1, l);
2809 break;
2810 case FLUSH_CACHE:
2811 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2812 break;
2815 len -= l;
2816 buf += l;
2817 addr += l;
2819 rcu_read_unlock();
2822 /* used for ROM loading : can write in RAM and ROM */
2823 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2824 const uint8_t *buf, int len)
2826 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2829 void cpu_flush_icache_range(hwaddr start, int len)
2832 * This function should do the same thing as an icache flush that was
2833 * triggered from within the guest. For TCG we are always cache coherent,
2834 * so there is no need to flush anything. For KVM / Xen we need to flush
2835 * the host's instruction cache at least.
2837 if (tcg_enabled()) {
2838 return;
2841 cpu_physical_memory_write_rom_internal(&address_space_memory,
2842 start, NULL, len, FLUSH_CACHE);
2845 typedef struct {
2846 MemoryRegion *mr;
2847 void *buffer;
2848 hwaddr addr;
2849 hwaddr len;
2850 bool in_use;
2851 } BounceBuffer;
2853 static BounceBuffer bounce;
2855 typedef struct MapClient {
2856 QEMUBH *bh;
2857 QLIST_ENTRY(MapClient) link;
2858 } MapClient;
2860 QemuMutex map_client_list_lock;
2861 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2862 = QLIST_HEAD_INITIALIZER(map_client_list);
2864 static void cpu_unregister_map_client_do(MapClient *client)
2866 QLIST_REMOVE(client, link);
2867 g_free(client);
2870 static void cpu_notify_map_clients_locked(void)
2872 MapClient *client;
2874 while (!QLIST_EMPTY(&map_client_list)) {
2875 client = QLIST_FIRST(&map_client_list);
2876 qemu_bh_schedule(client->bh);
2877 cpu_unregister_map_client_do(client);
2881 void cpu_register_map_client(QEMUBH *bh)
2883 MapClient *client = g_malloc(sizeof(*client));
2885 qemu_mutex_lock(&map_client_list_lock);
2886 client->bh = bh;
2887 QLIST_INSERT_HEAD(&map_client_list, client, link);
2888 if (!atomic_read(&bounce.in_use)) {
2889 cpu_notify_map_clients_locked();
2891 qemu_mutex_unlock(&map_client_list_lock);
2894 void cpu_exec_init_all(void)
2896 qemu_mutex_init(&ram_list.mutex);
2897 /* The data structures we set up here depend on knowing the page size,
2898 * so no more changes can be made after this point.
2899 * In an ideal world, nothing we did before we had finished the
2900 * machine setup would care about the target page size, and we could
2901 * do this much later, rather than requiring board models to state
2902 * up front what their requirements are.
2904 finalize_target_page_bits();
2905 io_mem_init();
2906 memory_map_init();
2907 qemu_mutex_init(&map_client_list_lock);
2910 void cpu_unregister_map_client(QEMUBH *bh)
2912 MapClient *client;
2914 qemu_mutex_lock(&map_client_list_lock);
2915 QLIST_FOREACH(client, &map_client_list, link) {
2916 if (client->bh == bh) {
2917 cpu_unregister_map_client_do(client);
2918 break;
2921 qemu_mutex_unlock(&map_client_list_lock);
2924 static void cpu_notify_map_clients(void)
2926 qemu_mutex_lock(&map_client_list_lock);
2927 cpu_notify_map_clients_locked();
2928 qemu_mutex_unlock(&map_client_list_lock);
2931 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2933 MemoryRegion *mr;
2934 hwaddr l, xlat;
2936 rcu_read_lock();
2937 while (len > 0) {
2938 l = len;
2939 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2940 if (!memory_access_is_direct(mr, is_write)) {
2941 l = memory_access_size(mr, l, addr);
2942 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2943 return false;
2947 len -= l;
2948 addr += l;
2950 rcu_read_unlock();
2951 return true;
2954 /* Map a physical memory region into a host virtual address.
2955 * May map a subset of the requested range, given by and returned in *plen.
2956 * May return NULL if resources needed to perform the mapping are exhausted.
2957 * Use only for reads OR writes - not for read-modify-write operations.
2958 * Use cpu_register_map_client() to know when retrying the map operation is
2959 * likely to succeed.
2961 void *address_space_map(AddressSpace *as,
2962 hwaddr addr,
2963 hwaddr *plen,
2964 bool is_write)
2966 hwaddr len = *plen;
2967 hwaddr done = 0;
2968 hwaddr l, xlat, base;
2969 MemoryRegion *mr, *this_mr;
2970 void *ptr;
2972 if (len == 0) {
2973 return NULL;
2976 l = len;
2977 rcu_read_lock();
2978 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2980 if (!memory_access_is_direct(mr, is_write)) {
2981 if (atomic_xchg(&bounce.in_use, true)) {
2982 rcu_read_unlock();
2983 return NULL;
2985 /* Avoid unbounded allocations */
2986 l = MIN(l, TARGET_PAGE_SIZE);
2987 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2988 bounce.addr = addr;
2989 bounce.len = l;
2991 memory_region_ref(mr);
2992 bounce.mr = mr;
2993 if (!is_write) {
2994 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2995 bounce.buffer, l);
2998 rcu_read_unlock();
2999 *plen = l;
3000 return bounce.buffer;
3003 base = xlat;
3005 for (;;) {
3006 len -= l;
3007 addr += l;
3008 done += l;
3009 if (len == 0) {
3010 break;
3013 l = len;
3014 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3015 if (this_mr != mr || xlat != base + done) {
3016 break;
3020 memory_region_ref(mr);
3021 *plen = done;
3022 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3023 rcu_read_unlock();
3025 return ptr;
3028 /* Unmaps a memory region previously mapped by address_space_map().
3029 * Will also mark the memory as dirty if is_write == 1. access_len gives
3030 * the amount of memory that was actually read or written by the caller.
3032 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3033 int is_write, hwaddr access_len)
3035 if (buffer != bounce.buffer) {
3036 MemoryRegion *mr;
3037 ram_addr_t addr1;
3039 mr = memory_region_from_host(buffer, &addr1);
3040 assert(mr != NULL);
3041 if (is_write) {
3042 invalidate_and_set_dirty(mr, addr1, access_len);
3044 if (xen_enabled()) {
3045 xen_invalidate_map_cache_entry(buffer);
3047 memory_region_unref(mr);
3048 return;
3050 if (is_write) {
3051 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3052 bounce.buffer, access_len);
3054 qemu_vfree(bounce.buffer);
3055 bounce.buffer = NULL;
3056 memory_region_unref(bounce.mr);
3057 atomic_mb_set(&bounce.in_use, false);
3058 cpu_notify_map_clients();
3061 void *cpu_physical_memory_map(hwaddr addr,
3062 hwaddr *plen,
3063 int is_write)
3065 return address_space_map(&address_space_memory, addr, plen, is_write);
3068 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3069 int is_write, hwaddr access_len)
3071 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3074 /* warning: addr must be aligned */
3075 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3076 MemTxAttrs attrs,
3077 MemTxResult *result,
3078 enum device_endian endian)
3080 uint8_t *ptr;
3081 uint64_t val;
3082 MemoryRegion *mr;
3083 hwaddr l = 4;
3084 hwaddr addr1;
3085 MemTxResult r;
3086 bool release_lock = false;
3088 rcu_read_lock();
3089 mr = address_space_translate(as, addr, &addr1, &l, false);
3090 if (l < 4 || !memory_access_is_direct(mr, false)) {
3091 release_lock |= prepare_mmio_access(mr);
3093 /* I/O case */
3094 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3095 #if defined(TARGET_WORDS_BIGENDIAN)
3096 if (endian == DEVICE_LITTLE_ENDIAN) {
3097 val = bswap32(val);
3099 #else
3100 if (endian == DEVICE_BIG_ENDIAN) {
3101 val = bswap32(val);
3103 #endif
3104 } else {
3105 /* RAM case */
3106 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3107 switch (endian) {
3108 case DEVICE_LITTLE_ENDIAN:
3109 val = ldl_le_p(ptr);
3110 break;
3111 case DEVICE_BIG_ENDIAN:
3112 val = ldl_be_p(ptr);
3113 break;
3114 default:
3115 val = ldl_p(ptr);
3116 break;
3118 r = MEMTX_OK;
3120 if (result) {
3121 *result = r;
3123 if (release_lock) {
3124 qemu_mutex_unlock_iothread();
3126 rcu_read_unlock();
3127 return val;
3130 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3131 MemTxAttrs attrs, MemTxResult *result)
3133 return address_space_ldl_internal(as, addr, attrs, result,
3134 DEVICE_NATIVE_ENDIAN);
3137 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3138 MemTxAttrs attrs, MemTxResult *result)
3140 return address_space_ldl_internal(as, addr, attrs, result,
3141 DEVICE_LITTLE_ENDIAN);
3144 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3145 MemTxAttrs attrs, MemTxResult *result)
3147 return address_space_ldl_internal(as, addr, attrs, result,
3148 DEVICE_BIG_ENDIAN);
3151 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3153 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3156 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3158 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3161 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3163 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3166 /* warning: addr must be aligned */
3167 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3168 MemTxAttrs attrs,
3169 MemTxResult *result,
3170 enum device_endian endian)
3172 uint8_t *ptr;
3173 uint64_t val;
3174 MemoryRegion *mr;
3175 hwaddr l = 8;
3176 hwaddr addr1;
3177 MemTxResult r;
3178 bool release_lock = false;
3180 rcu_read_lock();
3181 mr = address_space_translate(as, addr, &addr1, &l,
3182 false);
3183 if (l < 8 || !memory_access_is_direct(mr, false)) {
3184 release_lock |= prepare_mmio_access(mr);
3186 /* I/O case */
3187 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3188 #if defined(TARGET_WORDS_BIGENDIAN)
3189 if (endian == DEVICE_LITTLE_ENDIAN) {
3190 val = bswap64(val);
3192 #else
3193 if (endian == DEVICE_BIG_ENDIAN) {
3194 val = bswap64(val);
3196 #endif
3197 } else {
3198 /* RAM case */
3199 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3200 switch (endian) {
3201 case DEVICE_LITTLE_ENDIAN:
3202 val = ldq_le_p(ptr);
3203 break;
3204 case DEVICE_BIG_ENDIAN:
3205 val = ldq_be_p(ptr);
3206 break;
3207 default:
3208 val = ldq_p(ptr);
3209 break;
3211 r = MEMTX_OK;
3213 if (result) {
3214 *result = r;
3216 if (release_lock) {
3217 qemu_mutex_unlock_iothread();
3219 rcu_read_unlock();
3220 return val;
3223 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3224 MemTxAttrs attrs, MemTxResult *result)
3226 return address_space_ldq_internal(as, addr, attrs, result,
3227 DEVICE_NATIVE_ENDIAN);
3230 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3231 MemTxAttrs attrs, MemTxResult *result)
3233 return address_space_ldq_internal(as, addr, attrs, result,
3234 DEVICE_LITTLE_ENDIAN);
3237 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3238 MemTxAttrs attrs, MemTxResult *result)
3240 return address_space_ldq_internal(as, addr, attrs, result,
3241 DEVICE_BIG_ENDIAN);
3244 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3246 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3249 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3251 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3254 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3256 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3259 /* XXX: optimize */
3260 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3261 MemTxAttrs attrs, MemTxResult *result)
3263 uint8_t val;
3264 MemTxResult r;
3266 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3267 if (result) {
3268 *result = r;
3270 return val;
3273 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3275 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3278 /* warning: addr must be aligned */
3279 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3280 hwaddr addr,
3281 MemTxAttrs attrs,
3282 MemTxResult *result,
3283 enum device_endian endian)
3285 uint8_t *ptr;
3286 uint64_t val;
3287 MemoryRegion *mr;
3288 hwaddr l = 2;
3289 hwaddr addr1;
3290 MemTxResult r;
3291 bool release_lock = false;
3293 rcu_read_lock();
3294 mr = address_space_translate(as, addr, &addr1, &l,
3295 false);
3296 if (l < 2 || !memory_access_is_direct(mr, false)) {
3297 release_lock |= prepare_mmio_access(mr);
3299 /* I/O case */
3300 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3301 #if defined(TARGET_WORDS_BIGENDIAN)
3302 if (endian == DEVICE_LITTLE_ENDIAN) {
3303 val = bswap16(val);
3305 #else
3306 if (endian == DEVICE_BIG_ENDIAN) {
3307 val = bswap16(val);
3309 #endif
3310 } else {
3311 /* RAM case */
3312 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3313 switch (endian) {
3314 case DEVICE_LITTLE_ENDIAN:
3315 val = lduw_le_p(ptr);
3316 break;
3317 case DEVICE_BIG_ENDIAN:
3318 val = lduw_be_p(ptr);
3319 break;
3320 default:
3321 val = lduw_p(ptr);
3322 break;
3324 r = MEMTX_OK;
3326 if (result) {
3327 *result = r;
3329 if (release_lock) {
3330 qemu_mutex_unlock_iothread();
3332 rcu_read_unlock();
3333 return val;
3336 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3337 MemTxAttrs attrs, MemTxResult *result)
3339 return address_space_lduw_internal(as, addr, attrs, result,
3340 DEVICE_NATIVE_ENDIAN);
3343 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3344 MemTxAttrs attrs, MemTxResult *result)
3346 return address_space_lduw_internal(as, addr, attrs, result,
3347 DEVICE_LITTLE_ENDIAN);
3350 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3351 MemTxAttrs attrs, MemTxResult *result)
3353 return address_space_lduw_internal(as, addr, attrs, result,
3354 DEVICE_BIG_ENDIAN);
3357 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3359 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3362 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3364 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3367 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3369 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3372 /* warning: addr must be aligned. The ram page is not masked as dirty
3373 and the code inside is not invalidated. It is useful if the dirty
3374 bits are used to track modified PTEs */
3375 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3376 MemTxAttrs attrs, MemTxResult *result)
3378 uint8_t *ptr;
3379 MemoryRegion *mr;
3380 hwaddr l = 4;
3381 hwaddr addr1;
3382 MemTxResult r;
3383 uint8_t dirty_log_mask;
3384 bool release_lock = false;
3386 rcu_read_lock();
3387 mr = address_space_translate(as, addr, &addr1, &l,
3388 true);
3389 if (l < 4 || !memory_access_is_direct(mr, true)) {
3390 release_lock |= prepare_mmio_access(mr);
3392 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3393 } else {
3394 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3395 stl_p(ptr, val);
3397 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3398 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3399 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3400 4, dirty_log_mask);
3401 r = MEMTX_OK;
3403 if (result) {
3404 *result = r;
3406 if (release_lock) {
3407 qemu_mutex_unlock_iothread();
3409 rcu_read_unlock();
3412 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3414 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3417 /* warning: addr must be aligned */
3418 static inline void address_space_stl_internal(AddressSpace *as,
3419 hwaddr addr, uint32_t val,
3420 MemTxAttrs attrs,
3421 MemTxResult *result,
3422 enum device_endian endian)
3424 uint8_t *ptr;
3425 MemoryRegion *mr;
3426 hwaddr l = 4;
3427 hwaddr addr1;
3428 MemTxResult r;
3429 bool release_lock = false;
3431 rcu_read_lock();
3432 mr = address_space_translate(as, addr, &addr1, &l,
3433 true);
3434 if (l < 4 || !memory_access_is_direct(mr, true)) {
3435 release_lock |= prepare_mmio_access(mr);
3437 #if defined(TARGET_WORDS_BIGENDIAN)
3438 if (endian == DEVICE_LITTLE_ENDIAN) {
3439 val = bswap32(val);
3441 #else
3442 if (endian == DEVICE_BIG_ENDIAN) {
3443 val = bswap32(val);
3445 #endif
3446 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3447 } else {
3448 /* RAM case */
3449 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3450 switch (endian) {
3451 case DEVICE_LITTLE_ENDIAN:
3452 stl_le_p(ptr, val);
3453 break;
3454 case DEVICE_BIG_ENDIAN:
3455 stl_be_p(ptr, val);
3456 break;
3457 default:
3458 stl_p(ptr, val);
3459 break;
3461 invalidate_and_set_dirty(mr, addr1, 4);
3462 r = MEMTX_OK;
3464 if (result) {
3465 *result = r;
3467 if (release_lock) {
3468 qemu_mutex_unlock_iothread();
3470 rcu_read_unlock();
3473 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3474 MemTxAttrs attrs, MemTxResult *result)
3476 address_space_stl_internal(as, addr, val, attrs, result,
3477 DEVICE_NATIVE_ENDIAN);
3480 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3481 MemTxAttrs attrs, MemTxResult *result)
3483 address_space_stl_internal(as, addr, val, attrs, result,
3484 DEVICE_LITTLE_ENDIAN);
3487 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3488 MemTxAttrs attrs, MemTxResult *result)
3490 address_space_stl_internal(as, addr, val, attrs, result,
3491 DEVICE_BIG_ENDIAN);
3494 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3496 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3499 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3501 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3504 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3506 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3509 /* XXX: optimize */
3510 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3511 MemTxAttrs attrs, MemTxResult *result)
3513 uint8_t v = val;
3514 MemTxResult r;
3516 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3517 if (result) {
3518 *result = r;
3522 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3524 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3527 /* warning: addr must be aligned */
3528 static inline void address_space_stw_internal(AddressSpace *as,
3529 hwaddr addr, uint32_t val,
3530 MemTxAttrs attrs,
3531 MemTxResult *result,
3532 enum device_endian endian)
3534 uint8_t *ptr;
3535 MemoryRegion *mr;
3536 hwaddr l = 2;
3537 hwaddr addr1;
3538 MemTxResult r;
3539 bool release_lock = false;
3541 rcu_read_lock();
3542 mr = address_space_translate(as, addr, &addr1, &l, true);
3543 if (l < 2 || !memory_access_is_direct(mr, true)) {
3544 release_lock |= prepare_mmio_access(mr);
3546 #if defined(TARGET_WORDS_BIGENDIAN)
3547 if (endian == DEVICE_LITTLE_ENDIAN) {
3548 val = bswap16(val);
3550 #else
3551 if (endian == DEVICE_BIG_ENDIAN) {
3552 val = bswap16(val);
3554 #endif
3555 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3556 } else {
3557 /* RAM case */
3558 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3559 switch (endian) {
3560 case DEVICE_LITTLE_ENDIAN:
3561 stw_le_p(ptr, val);
3562 break;
3563 case DEVICE_BIG_ENDIAN:
3564 stw_be_p(ptr, val);
3565 break;
3566 default:
3567 stw_p(ptr, val);
3568 break;
3570 invalidate_and_set_dirty(mr, addr1, 2);
3571 r = MEMTX_OK;
3573 if (result) {
3574 *result = r;
3576 if (release_lock) {
3577 qemu_mutex_unlock_iothread();
3579 rcu_read_unlock();
3582 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3583 MemTxAttrs attrs, MemTxResult *result)
3585 address_space_stw_internal(as, addr, val, attrs, result,
3586 DEVICE_NATIVE_ENDIAN);
3589 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3590 MemTxAttrs attrs, MemTxResult *result)
3592 address_space_stw_internal(as, addr, val, attrs, result,
3593 DEVICE_LITTLE_ENDIAN);
3596 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3597 MemTxAttrs attrs, MemTxResult *result)
3599 address_space_stw_internal(as, addr, val, attrs, result,
3600 DEVICE_BIG_ENDIAN);
3603 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3605 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3608 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3610 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3613 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3615 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3618 /* XXX: optimize */
3619 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3620 MemTxAttrs attrs, MemTxResult *result)
3622 MemTxResult r;
3623 val = tswap64(val);
3624 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3625 if (result) {
3626 *result = r;
3630 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3631 MemTxAttrs attrs, MemTxResult *result)
3633 MemTxResult r;
3634 val = cpu_to_le64(val);
3635 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3636 if (result) {
3637 *result = r;
3640 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3641 MemTxAttrs attrs, MemTxResult *result)
3643 MemTxResult r;
3644 val = cpu_to_be64(val);
3645 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3646 if (result) {
3647 *result = r;
3651 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3653 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3656 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3658 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3661 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3663 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3666 /* virtual memory access for debug (includes writing to ROM) */
3667 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3668 uint8_t *buf, int len, int is_write)
3670 int l;
3671 hwaddr phys_addr;
3672 target_ulong page;
3674 while (len > 0) {
3675 int asidx;
3676 MemTxAttrs attrs;
3678 page = addr & TARGET_PAGE_MASK;
3679 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3680 asidx = cpu_asidx_from_attrs(cpu, attrs);
3681 /* if no physical page mapped, return an error */
3682 if (phys_addr == -1)
3683 return -1;
3684 l = (page + TARGET_PAGE_SIZE) - addr;
3685 if (l > len)
3686 l = len;
3687 phys_addr += (addr & ~TARGET_PAGE_MASK);
3688 if (is_write) {
3689 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3690 phys_addr, buf, l);
3691 } else {
3692 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3693 MEMTXATTRS_UNSPECIFIED,
3694 buf, l, 0);
3696 len -= l;
3697 buf += l;
3698 addr += l;
3700 return 0;
3704 * Allows code that needs to deal with migration bitmaps etc to still be built
3705 * target independent.
3707 size_t qemu_target_page_bits(void)
3709 return TARGET_PAGE_BITS;
3712 #endif
3715 * A helper function for the _utterly broken_ virtio device model to find out if
3716 * it's running on a big endian machine. Don't do this at home kids!
3718 bool target_words_bigendian(void);
3719 bool target_words_bigendian(void)
3721 #if defined(TARGET_WORDS_BIGENDIAN)
3722 return true;
3723 #else
3724 return false;
3725 #endif
3728 #ifndef CONFIG_USER_ONLY
3729 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3731 MemoryRegion*mr;
3732 hwaddr l = 1;
3733 bool res;
3735 rcu_read_lock();
3736 mr = address_space_translate(&address_space_memory,
3737 phys_addr, &phys_addr, &l, false);
3739 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3740 rcu_read_unlock();
3741 return res;
3744 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3746 RAMBlock *block;
3747 int ret = 0;
3749 rcu_read_lock();
3750 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3751 ret = func(block->idstr, block->host, block->offset,
3752 block->used_length, opaque);
3753 if (ret) {
3754 break;
3757 rcu_read_unlock();
3758 return ret;
3760 #endif