vhost-user: minor simplification
[qemu/kevin.git] / exec.c
blob60cf46a5b51cf9b8127d0b1747349be52eddde7d
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include "qemu.h"
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
97 /* current CPU in the current thread. It is only valid inside
98 cpu_exec() */
99 __thread CPUState *current_cpu;
100 /* 0 = Do not count executed instructions.
101 1 = Precise instruction counting.
102 2 = Adaptive rate instruction counting. */
103 int use_icount;
105 #if !defined(CONFIG_USER_ONLY)
107 typedef struct PhysPageEntry PhysPageEntry;
109 struct PhysPageEntry {
110 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111 uint32_t skip : 6;
112 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
113 uint32_t ptr : 26;
116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
118 /* Size of the L2 (and L3, etc) page tables. */
119 #define ADDR_SPACE_BITS 64
121 #define P_L2_BITS 9
122 #define P_L2_SIZE (1 << P_L2_BITS)
124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
126 typedef PhysPageEntry Node[P_L2_SIZE];
128 typedef struct PhysPageMap {
129 struct rcu_head rcu;
131 unsigned sections_nb;
132 unsigned sections_nb_alloc;
133 unsigned nodes_nb;
134 unsigned nodes_nb_alloc;
135 Node *nodes;
136 MemoryRegionSection *sections;
137 } PhysPageMap;
139 struct AddressSpaceDispatch {
140 struct rcu_head rcu;
142 MemoryRegionSection *mru_section;
143 /* This is a multi-level map on the physical address space.
144 * The bottom level has pointers to MemoryRegionSections.
146 PhysPageEntry phys_map;
147 PhysPageMap map;
148 AddressSpace *as;
151 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
152 typedef struct subpage_t {
153 MemoryRegion iomem;
154 AddressSpace *as;
155 hwaddr base;
156 uint16_t sub_section[TARGET_PAGE_SIZE];
157 } subpage_t;
159 #define PHYS_SECTION_UNASSIGNED 0
160 #define PHYS_SECTION_NOTDIRTY 1
161 #define PHYS_SECTION_ROM 2
162 #define PHYS_SECTION_WATCH 3
164 static void io_mem_init(void);
165 static void memory_map_init(void);
166 static void tcg_commit(MemoryListener *listener);
168 static MemoryRegion io_mem_watch;
171 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
172 * @cpu: the CPU whose AddressSpace this is
173 * @as: the AddressSpace itself
174 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
175 * @tcg_as_listener: listener for tracking changes to the AddressSpace
177 struct CPUAddressSpace {
178 CPUState *cpu;
179 AddressSpace *as;
180 struct AddressSpaceDispatch *memory_dispatch;
181 MemoryListener tcg_as_listener;
184 #endif
186 #if !defined(CONFIG_USER_ONLY)
188 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
190 static unsigned alloc_hint = 16;
191 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
192 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
193 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
194 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
195 alloc_hint = map->nodes_nb_alloc;
199 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
201 unsigned i;
202 uint32_t ret;
203 PhysPageEntry e;
204 PhysPageEntry *p;
206 ret = map->nodes_nb++;
207 p = map->nodes[ret];
208 assert(ret != PHYS_MAP_NODE_NIL);
209 assert(ret != map->nodes_nb_alloc);
211 e.skip = leaf ? 0 : 1;
212 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
213 for (i = 0; i < P_L2_SIZE; ++i) {
214 memcpy(&p[i], &e, sizeof(e));
216 return ret;
219 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
220 hwaddr *index, hwaddr *nb, uint16_t leaf,
221 int level)
223 PhysPageEntry *p;
224 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
226 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
227 lp->ptr = phys_map_node_alloc(map, level == 0);
229 p = map->nodes[lp->ptr];
230 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
232 while (*nb && lp < &p[P_L2_SIZE]) {
233 if ((*index & (step - 1)) == 0 && *nb >= step) {
234 lp->skip = 0;
235 lp->ptr = leaf;
236 *index += step;
237 *nb -= step;
238 } else {
239 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
241 ++lp;
245 static void phys_page_set(AddressSpaceDispatch *d,
246 hwaddr index, hwaddr nb,
247 uint16_t leaf)
249 /* Wildly overreserve - it doesn't matter much. */
250 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
252 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
255 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
256 * and update our entry so we can skip it and go directly to the destination.
258 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
260 unsigned valid_ptr = P_L2_SIZE;
261 int valid = 0;
262 PhysPageEntry *p;
263 int i;
265 if (lp->ptr == PHYS_MAP_NODE_NIL) {
266 return;
269 p = nodes[lp->ptr];
270 for (i = 0; i < P_L2_SIZE; i++) {
271 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
272 continue;
275 valid_ptr = i;
276 valid++;
277 if (p[i].skip) {
278 phys_page_compact(&p[i], nodes, compacted);
282 /* We can only compress if there's only one child. */
283 if (valid != 1) {
284 return;
287 assert(valid_ptr < P_L2_SIZE);
289 /* Don't compress if it won't fit in the # of bits we have. */
290 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
291 return;
294 lp->ptr = p[valid_ptr].ptr;
295 if (!p[valid_ptr].skip) {
296 /* If our only child is a leaf, make this a leaf. */
297 /* By design, we should have made this node a leaf to begin with so we
298 * should never reach here.
299 * But since it's so simple to handle this, let's do it just in case we
300 * change this rule.
302 lp->skip = 0;
303 } else {
304 lp->skip += p[valid_ptr].skip;
308 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
310 DECLARE_BITMAP(compacted, nodes_nb);
312 if (d->phys_map.skip) {
313 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
317 static inline bool section_covers_addr(const MemoryRegionSection *section,
318 hwaddr addr)
320 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
321 * the section must cover the entire address space.
323 return section->size.hi ||
324 range_covers_byte(section->offset_within_address_space,
325 section->size.lo, addr);
328 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
329 Node *nodes, MemoryRegionSection *sections)
331 PhysPageEntry *p;
332 hwaddr index = addr >> TARGET_PAGE_BITS;
333 int i;
335 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
336 if (lp.ptr == PHYS_MAP_NODE_NIL) {
337 return &sections[PHYS_SECTION_UNASSIGNED];
339 p = nodes[lp.ptr];
340 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
343 if (section_covers_addr(&sections[lp.ptr], addr)) {
344 return &sections[lp.ptr];
345 } else {
346 return &sections[PHYS_SECTION_UNASSIGNED];
350 bool memory_region_is_unassigned(MemoryRegion *mr)
352 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
353 && mr != &io_mem_watch;
356 /* Called from RCU critical section */
357 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
358 hwaddr addr,
359 bool resolve_subpage)
361 MemoryRegionSection *section = atomic_read(&d->mru_section);
362 subpage_t *subpage;
363 bool update;
365 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
366 section_covers_addr(section, addr)) {
367 update = false;
368 } else {
369 section = phys_page_find(d->phys_map, addr, d->map.nodes,
370 d->map.sections);
371 update = true;
373 if (resolve_subpage && section->mr->subpage) {
374 subpage = container_of(section->mr, subpage_t, iomem);
375 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
377 if (update) {
378 atomic_set(&d->mru_section, section);
380 return section;
383 /* Called from RCU critical section */
384 static MemoryRegionSection *
385 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
386 hwaddr *plen, bool resolve_subpage)
388 MemoryRegionSection *section;
389 MemoryRegion *mr;
390 Int128 diff;
392 section = address_space_lookup_region(d, addr, resolve_subpage);
393 /* Compute offset within MemoryRegionSection */
394 addr -= section->offset_within_address_space;
396 /* Compute offset within MemoryRegion */
397 *xlat = addr + section->offset_within_region;
399 mr = section->mr;
401 /* MMIO registers can be expected to perform full-width accesses based only
402 * on their address, without considering adjacent registers that could
403 * decode to completely different MemoryRegions. When such registers
404 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
405 * regions overlap wildly. For this reason we cannot clamp the accesses
406 * here.
408 * If the length is small (as is the case for address_space_ldl/stl),
409 * everything works fine. If the incoming length is large, however,
410 * the caller really has to do the clamping through memory_access_size.
412 if (memory_region_is_ram(mr)) {
413 diff = int128_sub(section->size, int128_make64(addr));
414 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
416 return section;
419 /* Called from RCU critical section */
420 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
421 hwaddr *xlat, hwaddr *plen,
422 bool is_write)
424 IOMMUTLBEntry iotlb;
425 MemoryRegionSection *section;
426 MemoryRegion *mr;
428 for (;;) {
429 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
430 section = address_space_translate_internal(d, addr, &addr, plen, true);
431 mr = section->mr;
433 if (!mr->iommu_ops) {
434 break;
437 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
438 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
439 | (addr & iotlb.addr_mask));
440 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
441 if (!(iotlb.perm & (1 << is_write))) {
442 mr = &io_mem_unassigned;
443 break;
446 as = iotlb.target_as;
449 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
450 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
451 *plen = MIN(page, *plen);
454 *xlat = addr;
455 return mr;
458 /* Called from RCU critical section */
459 MemoryRegionSection *
460 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
461 hwaddr *xlat, hwaddr *plen)
463 MemoryRegionSection *section;
464 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
466 section = address_space_translate_internal(d, addr, xlat, plen, false);
468 assert(!section->mr->iommu_ops);
469 return section;
471 #endif
473 #if !defined(CONFIG_USER_ONLY)
475 static int cpu_common_post_load(void *opaque, int version_id)
477 CPUState *cpu = opaque;
479 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
480 version_id is increased. */
481 cpu->interrupt_request &= ~0x01;
482 tlb_flush(cpu, 1);
484 return 0;
487 static int cpu_common_pre_load(void *opaque)
489 CPUState *cpu = opaque;
491 cpu->exception_index = -1;
493 return 0;
496 static bool cpu_common_exception_index_needed(void *opaque)
498 CPUState *cpu = opaque;
500 return tcg_enabled() && cpu->exception_index != -1;
503 static const VMStateDescription vmstate_cpu_common_exception_index = {
504 .name = "cpu_common/exception_index",
505 .version_id = 1,
506 .minimum_version_id = 1,
507 .needed = cpu_common_exception_index_needed,
508 .fields = (VMStateField[]) {
509 VMSTATE_INT32(exception_index, CPUState),
510 VMSTATE_END_OF_LIST()
514 static bool cpu_common_crash_occurred_needed(void *opaque)
516 CPUState *cpu = opaque;
518 return cpu->crash_occurred;
521 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
522 .name = "cpu_common/crash_occurred",
523 .version_id = 1,
524 .minimum_version_id = 1,
525 .needed = cpu_common_crash_occurred_needed,
526 .fields = (VMStateField[]) {
527 VMSTATE_BOOL(crash_occurred, CPUState),
528 VMSTATE_END_OF_LIST()
532 const VMStateDescription vmstate_cpu_common = {
533 .name = "cpu_common",
534 .version_id = 1,
535 .minimum_version_id = 1,
536 .pre_load = cpu_common_pre_load,
537 .post_load = cpu_common_post_load,
538 .fields = (VMStateField[]) {
539 VMSTATE_UINT32(halted, CPUState),
540 VMSTATE_UINT32(interrupt_request, CPUState),
541 VMSTATE_END_OF_LIST()
543 .subsections = (const VMStateDescription*[]) {
544 &vmstate_cpu_common_exception_index,
545 &vmstate_cpu_common_crash_occurred,
546 NULL
550 #endif
552 CPUState *qemu_get_cpu(int index)
554 CPUState *cpu;
556 CPU_FOREACH(cpu) {
557 if (cpu->cpu_index == index) {
558 return cpu;
562 return NULL;
565 #if !defined(CONFIG_USER_ONLY)
566 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
568 CPUAddressSpace *newas;
570 /* Target code should have set num_ases before calling us */
571 assert(asidx < cpu->num_ases);
573 if (asidx == 0) {
574 /* address space 0 gets the convenience alias */
575 cpu->as = as;
578 /* KVM cannot currently support multiple address spaces. */
579 assert(asidx == 0 || !kvm_enabled());
581 if (!cpu->cpu_ases) {
582 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
585 newas = &cpu->cpu_ases[asidx];
586 newas->cpu = cpu;
587 newas->as = as;
588 if (tcg_enabled()) {
589 newas->tcg_as_listener.commit = tcg_commit;
590 memory_listener_register(&newas->tcg_as_listener, as);
594 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
596 /* Return the AddressSpace corresponding to the specified index */
597 return cpu->cpu_ases[asidx].as;
599 #endif
601 #ifndef CONFIG_USER_ONLY
602 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
604 static int cpu_get_free_index(Error **errp)
606 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
608 if (cpu >= MAX_CPUMASK_BITS) {
609 error_setg(errp, "Trying to use more CPUs than max of %d",
610 MAX_CPUMASK_BITS);
611 return -1;
614 bitmap_set(cpu_index_map, cpu, 1);
615 return cpu;
618 static void cpu_release_index(CPUState *cpu)
620 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
622 #else
624 static int cpu_get_free_index(Error **errp)
626 CPUState *some_cpu;
627 int cpu_index = 0;
629 CPU_FOREACH(some_cpu) {
630 cpu_index++;
632 return cpu_index;
635 static void cpu_release_index(CPUState *cpu)
637 return;
639 #endif
641 void cpu_exec_exit(CPUState *cpu)
643 CPUClass *cc = CPU_GET_CLASS(cpu);
645 #if defined(CONFIG_USER_ONLY)
646 cpu_list_lock();
647 #endif
648 if (cpu->cpu_index == -1) {
649 /* cpu_index was never allocated by this @cpu or was already freed. */
650 #if defined(CONFIG_USER_ONLY)
651 cpu_list_unlock();
652 #endif
653 return;
656 QTAILQ_REMOVE(&cpus, cpu, node);
657 cpu_release_index(cpu);
658 cpu->cpu_index = -1;
659 #if defined(CONFIG_USER_ONLY)
660 cpu_list_unlock();
661 #endif
663 if (cc->vmsd != NULL) {
664 vmstate_unregister(NULL, cc->vmsd, cpu);
666 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
667 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
671 void cpu_exec_init(CPUState *cpu, Error **errp)
673 CPUClass *cc = CPU_GET_CLASS(cpu);
674 Error *local_err = NULL;
676 cpu->as = NULL;
677 cpu->num_ases = 0;
679 #ifndef CONFIG_USER_ONLY
680 cpu->thread_id = qemu_get_thread_id();
682 /* This is a softmmu CPU object, so create a property for it
683 * so users can wire up its memory. (This can't go in qom/cpu.c
684 * because that file is compiled only once for both user-mode
685 * and system builds.) The default if no link is set up is to use
686 * the system address space.
688 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
689 (Object **)&cpu->memory,
690 qdev_prop_allow_set_link_before_realize,
691 OBJ_PROP_LINK_UNREF_ON_RELEASE,
692 &error_abort);
693 cpu->memory = system_memory;
694 object_ref(OBJECT(cpu->memory));
695 #endif
697 #if defined(CONFIG_USER_ONLY)
698 cpu_list_lock();
699 #endif
700 cpu->cpu_index = cpu_get_free_index(&local_err);
701 if (local_err) {
702 error_propagate(errp, local_err);
703 #if defined(CONFIG_USER_ONLY)
704 cpu_list_unlock();
705 #endif
706 return;
708 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
709 #if defined(CONFIG_USER_ONLY)
710 (void) cc;
711 cpu_list_unlock();
712 #else
713 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
714 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
716 if (cc->vmsd != NULL) {
717 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
719 #endif
722 #if defined(CONFIG_USER_ONLY)
723 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
725 tb_invalidate_phys_page_range(pc, pc + 1, 0);
727 #else
728 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
730 MemTxAttrs attrs;
731 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
732 int asidx = cpu_asidx_from_attrs(cpu, attrs);
733 if (phys != -1) {
734 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
735 phys | (pc & ~TARGET_PAGE_MASK));
738 #endif
740 #if defined(CONFIG_USER_ONLY)
741 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
746 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
747 int flags)
749 return -ENOSYS;
752 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
756 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
757 int flags, CPUWatchpoint **watchpoint)
759 return -ENOSYS;
761 #else
762 /* Add a watchpoint. */
763 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
764 int flags, CPUWatchpoint **watchpoint)
766 CPUWatchpoint *wp;
768 /* forbid ranges which are empty or run off the end of the address space */
769 if (len == 0 || (addr + len - 1) < addr) {
770 error_report("tried to set invalid watchpoint at %"
771 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
772 return -EINVAL;
774 wp = g_malloc(sizeof(*wp));
776 wp->vaddr = addr;
777 wp->len = len;
778 wp->flags = flags;
780 /* keep all GDB-injected watchpoints in front */
781 if (flags & BP_GDB) {
782 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
783 } else {
784 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
787 tlb_flush_page(cpu, addr);
789 if (watchpoint)
790 *watchpoint = wp;
791 return 0;
794 /* Remove a specific watchpoint. */
795 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
796 int flags)
798 CPUWatchpoint *wp;
800 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
801 if (addr == wp->vaddr && len == wp->len
802 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
803 cpu_watchpoint_remove_by_ref(cpu, wp);
804 return 0;
807 return -ENOENT;
810 /* Remove a specific watchpoint by reference. */
811 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
813 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
815 tlb_flush_page(cpu, watchpoint->vaddr);
817 g_free(watchpoint);
820 /* Remove all matching watchpoints. */
821 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
823 CPUWatchpoint *wp, *next;
825 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
826 if (wp->flags & mask) {
827 cpu_watchpoint_remove_by_ref(cpu, wp);
832 /* Return true if this watchpoint address matches the specified
833 * access (ie the address range covered by the watchpoint overlaps
834 * partially or completely with the address range covered by the
835 * access).
837 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
838 vaddr addr,
839 vaddr len)
841 /* We know the lengths are non-zero, but a little caution is
842 * required to avoid errors in the case where the range ends
843 * exactly at the top of the address space and so addr + len
844 * wraps round to zero.
846 vaddr wpend = wp->vaddr + wp->len - 1;
847 vaddr addrend = addr + len - 1;
849 return !(addr > wpend || wp->vaddr > addrend);
852 #endif
854 /* Add a breakpoint. */
855 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
856 CPUBreakpoint **breakpoint)
858 CPUBreakpoint *bp;
860 bp = g_malloc(sizeof(*bp));
862 bp->pc = pc;
863 bp->flags = flags;
865 /* keep all GDB-injected breakpoints in front */
866 if (flags & BP_GDB) {
867 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
868 } else {
869 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
872 breakpoint_invalidate(cpu, pc);
874 if (breakpoint) {
875 *breakpoint = bp;
877 return 0;
880 /* Remove a specific breakpoint. */
881 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
883 CPUBreakpoint *bp;
885 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
886 if (bp->pc == pc && bp->flags == flags) {
887 cpu_breakpoint_remove_by_ref(cpu, bp);
888 return 0;
891 return -ENOENT;
894 /* Remove a specific breakpoint by reference. */
895 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
897 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
899 breakpoint_invalidate(cpu, breakpoint->pc);
901 g_free(breakpoint);
904 /* Remove all matching breakpoints. */
905 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
907 CPUBreakpoint *bp, *next;
909 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
910 if (bp->flags & mask) {
911 cpu_breakpoint_remove_by_ref(cpu, bp);
916 /* enable or disable single step mode. EXCP_DEBUG is returned by the
917 CPU loop after each instruction */
918 void cpu_single_step(CPUState *cpu, int enabled)
920 if (cpu->singlestep_enabled != enabled) {
921 cpu->singlestep_enabled = enabled;
922 if (kvm_enabled()) {
923 kvm_update_guest_debug(cpu, 0);
924 } else {
925 /* must flush all the translated code to avoid inconsistencies */
926 /* XXX: only flush what is necessary */
927 tb_flush(cpu);
932 void cpu_abort(CPUState *cpu, const char *fmt, ...)
934 va_list ap;
935 va_list ap2;
937 va_start(ap, fmt);
938 va_copy(ap2, ap);
939 fprintf(stderr, "qemu: fatal: ");
940 vfprintf(stderr, fmt, ap);
941 fprintf(stderr, "\n");
942 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
943 if (qemu_log_separate()) {
944 qemu_log("qemu: fatal: ");
945 qemu_log_vprintf(fmt, ap2);
946 qemu_log("\n");
947 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
948 qemu_log_flush();
949 qemu_log_close();
951 va_end(ap2);
952 va_end(ap);
953 replay_finish();
954 #if defined(CONFIG_USER_ONLY)
956 struct sigaction act;
957 sigfillset(&act.sa_mask);
958 act.sa_handler = SIG_DFL;
959 sigaction(SIGABRT, &act, NULL);
961 #endif
962 abort();
965 #if !defined(CONFIG_USER_ONLY)
966 /* Called from RCU critical section */
967 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
969 RAMBlock *block;
971 block = atomic_rcu_read(&ram_list.mru_block);
972 if (block && addr - block->offset < block->max_length) {
973 return block;
975 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
976 if (addr - block->offset < block->max_length) {
977 goto found;
981 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
982 abort();
984 found:
985 /* It is safe to write mru_block outside the iothread lock. This
986 * is what happens:
988 * mru_block = xxx
989 * rcu_read_unlock()
990 * xxx removed from list
991 * rcu_read_lock()
992 * read mru_block
993 * mru_block = NULL;
994 * call_rcu(reclaim_ramblock, xxx);
995 * rcu_read_unlock()
997 * atomic_rcu_set is not needed here. The block was already published
998 * when it was placed into the list. Here we're just making an extra
999 * copy of the pointer.
1001 ram_list.mru_block = block;
1002 return block;
1005 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
1007 CPUState *cpu;
1008 ram_addr_t start1;
1009 RAMBlock *block;
1010 ram_addr_t end;
1012 end = TARGET_PAGE_ALIGN(start + length);
1013 start &= TARGET_PAGE_MASK;
1015 rcu_read_lock();
1016 block = qemu_get_ram_block(start);
1017 assert(block == qemu_get_ram_block(end - 1));
1018 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
1019 CPU_FOREACH(cpu) {
1020 tlb_reset_dirty(cpu, start1, length);
1022 rcu_read_unlock();
1025 /* Note: start and end must be within the same ram block. */
1026 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
1027 ram_addr_t length,
1028 unsigned client)
1030 DirtyMemoryBlocks *blocks;
1031 unsigned long end, page;
1032 bool dirty = false;
1034 if (length == 0) {
1035 return false;
1038 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1039 page = start >> TARGET_PAGE_BITS;
1041 rcu_read_lock();
1043 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1045 while (page < end) {
1046 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1047 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1048 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1050 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1051 offset, num);
1052 page += num;
1055 rcu_read_unlock();
1057 if (dirty && tcg_enabled()) {
1058 tlb_reset_dirty_range_all(start, length);
1061 return dirty;
1064 /* Called from RCU critical section */
1065 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1066 MemoryRegionSection *section,
1067 target_ulong vaddr,
1068 hwaddr paddr, hwaddr xlat,
1069 int prot,
1070 target_ulong *address)
1072 hwaddr iotlb;
1073 CPUWatchpoint *wp;
1075 if (memory_region_is_ram(section->mr)) {
1076 /* Normal RAM. */
1077 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1078 if (!section->readonly) {
1079 iotlb |= PHYS_SECTION_NOTDIRTY;
1080 } else {
1081 iotlb |= PHYS_SECTION_ROM;
1083 } else {
1084 AddressSpaceDispatch *d;
1086 d = atomic_rcu_read(&section->address_space->dispatch);
1087 iotlb = section - d->map.sections;
1088 iotlb += xlat;
1091 /* Make accesses to pages with watchpoints go via the
1092 watchpoint trap routines. */
1093 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1094 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1095 /* Avoid trapping reads of pages with a write breakpoint. */
1096 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1097 iotlb = PHYS_SECTION_WATCH + paddr;
1098 *address |= TLB_MMIO;
1099 break;
1104 return iotlb;
1106 #endif /* defined(CONFIG_USER_ONLY) */
1108 #if !defined(CONFIG_USER_ONLY)
1110 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1111 uint16_t section);
1112 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1114 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1115 qemu_anon_ram_alloc;
1118 * Set a custom physical guest memory alloator.
1119 * Accelerators with unusual needs may need this. Hopefully, we can
1120 * get rid of it eventually.
1122 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1124 phys_mem_alloc = alloc;
1127 static uint16_t phys_section_add(PhysPageMap *map,
1128 MemoryRegionSection *section)
1130 /* The physical section number is ORed with a page-aligned
1131 * pointer to produce the iotlb entries. Thus it should
1132 * never overflow into the page-aligned value.
1134 assert(map->sections_nb < TARGET_PAGE_SIZE);
1136 if (map->sections_nb == map->sections_nb_alloc) {
1137 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1138 map->sections = g_renew(MemoryRegionSection, map->sections,
1139 map->sections_nb_alloc);
1141 map->sections[map->sections_nb] = *section;
1142 memory_region_ref(section->mr);
1143 return map->sections_nb++;
1146 static void phys_section_destroy(MemoryRegion *mr)
1148 bool have_sub_page = mr->subpage;
1150 memory_region_unref(mr);
1152 if (have_sub_page) {
1153 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1154 object_unref(OBJECT(&subpage->iomem));
1155 g_free(subpage);
1159 static void phys_sections_free(PhysPageMap *map)
1161 while (map->sections_nb > 0) {
1162 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1163 phys_section_destroy(section->mr);
1165 g_free(map->sections);
1166 g_free(map->nodes);
1169 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1171 subpage_t *subpage;
1172 hwaddr base = section->offset_within_address_space
1173 & TARGET_PAGE_MASK;
1174 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1175 d->map.nodes, d->map.sections);
1176 MemoryRegionSection subsection = {
1177 .offset_within_address_space = base,
1178 .size = int128_make64(TARGET_PAGE_SIZE),
1180 hwaddr start, end;
1182 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1184 if (!(existing->mr->subpage)) {
1185 subpage = subpage_init(d->as, base);
1186 subsection.address_space = d->as;
1187 subsection.mr = &subpage->iomem;
1188 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1189 phys_section_add(&d->map, &subsection));
1190 } else {
1191 subpage = container_of(existing->mr, subpage_t, iomem);
1193 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1194 end = start + int128_get64(section->size) - 1;
1195 subpage_register(subpage, start, end,
1196 phys_section_add(&d->map, section));
1200 static void register_multipage(AddressSpaceDispatch *d,
1201 MemoryRegionSection *section)
1203 hwaddr start_addr = section->offset_within_address_space;
1204 uint16_t section_index = phys_section_add(&d->map, section);
1205 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1206 TARGET_PAGE_BITS));
1208 assert(num_pages);
1209 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1212 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1214 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1215 AddressSpaceDispatch *d = as->next_dispatch;
1216 MemoryRegionSection now = *section, remain = *section;
1217 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1219 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1220 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1221 - now.offset_within_address_space;
1223 now.size = int128_min(int128_make64(left), now.size);
1224 register_subpage(d, &now);
1225 } else {
1226 now.size = int128_zero();
1228 while (int128_ne(remain.size, now.size)) {
1229 remain.size = int128_sub(remain.size, now.size);
1230 remain.offset_within_address_space += int128_get64(now.size);
1231 remain.offset_within_region += int128_get64(now.size);
1232 now = remain;
1233 if (int128_lt(remain.size, page_size)) {
1234 register_subpage(d, &now);
1235 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1236 now.size = page_size;
1237 register_subpage(d, &now);
1238 } else {
1239 now.size = int128_and(now.size, int128_neg(page_size));
1240 register_multipage(d, &now);
1245 void qemu_flush_coalesced_mmio_buffer(void)
1247 if (kvm_enabled())
1248 kvm_flush_coalesced_mmio_buffer();
1251 void qemu_mutex_lock_ramlist(void)
1253 qemu_mutex_lock(&ram_list.mutex);
1256 void qemu_mutex_unlock_ramlist(void)
1258 qemu_mutex_unlock(&ram_list.mutex);
1261 #ifdef __linux__
1262 static void *file_ram_alloc(RAMBlock *block,
1263 ram_addr_t memory,
1264 const char *path,
1265 Error **errp)
1267 bool unlink_on_error = false;
1268 char *filename;
1269 char *sanitized_name;
1270 char *c;
1271 void *area;
1272 int fd = -1;
1273 int64_t page_size;
1275 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1276 error_setg(errp,
1277 "host lacks kvm mmu notifiers, -mem-path unsupported");
1278 return NULL;
1281 for (;;) {
1282 fd = open(path, O_RDWR);
1283 if (fd >= 0) {
1284 /* @path names an existing file, use it */
1285 break;
1287 if (errno == ENOENT) {
1288 /* @path names a file that doesn't exist, create it */
1289 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1290 if (fd >= 0) {
1291 unlink_on_error = true;
1292 break;
1294 } else if (errno == EISDIR) {
1295 /* @path names a directory, create a file there */
1296 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1297 sanitized_name = g_strdup(memory_region_name(block->mr));
1298 for (c = sanitized_name; *c != '\0'; c++) {
1299 if (*c == '/') {
1300 *c = '_';
1304 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1305 sanitized_name);
1306 g_free(sanitized_name);
1308 fd = mkstemp(filename);
1309 if (fd >= 0) {
1310 unlink(filename);
1311 g_free(filename);
1312 break;
1314 g_free(filename);
1316 if (errno != EEXIST && errno != EINTR) {
1317 error_setg_errno(errp, errno,
1318 "can't open backing store %s for guest RAM",
1319 path);
1320 goto error;
1323 * Try again on EINTR and EEXIST. The latter happens when
1324 * something else creates the file between our two open().
1328 page_size = qemu_fd_getpagesize(fd);
1329 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1331 if (memory < page_size) {
1332 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1333 "or larger than page size 0x%" PRIx64,
1334 memory, page_size);
1335 goto error;
1338 memory = ROUND_UP(memory, page_size);
1341 * ftruncate is not supported by hugetlbfs in older
1342 * hosts, so don't bother bailing out on errors.
1343 * If anything goes wrong with it under other filesystems,
1344 * mmap will fail.
1346 if (ftruncate(fd, memory)) {
1347 perror("ftruncate");
1350 area = qemu_ram_mmap(fd, memory, block->mr->align,
1351 block->flags & RAM_SHARED);
1352 if (area == MAP_FAILED) {
1353 error_setg_errno(errp, errno,
1354 "unable to map backing store for guest RAM");
1355 goto error;
1358 if (mem_prealloc) {
1359 os_mem_prealloc(fd, area, memory);
1362 block->fd = fd;
1363 return area;
1365 error:
1366 if (unlink_on_error) {
1367 unlink(path);
1369 if (fd != -1) {
1370 close(fd);
1372 return NULL;
1374 #endif
1376 /* Called with the ramlist lock held. */
1377 static ram_addr_t find_ram_offset(ram_addr_t size)
1379 RAMBlock *block, *next_block;
1380 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1382 assert(size != 0); /* it would hand out same offset multiple times */
1384 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1385 return 0;
1388 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1389 ram_addr_t end, next = RAM_ADDR_MAX;
1391 end = block->offset + block->max_length;
1393 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1394 if (next_block->offset >= end) {
1395 next = MIN(next, next_block->offset);
1398 if (next - end >= size && next - end < mingap) {
1399 offset = end;
1400 mingap = next - end;
1404 if (offset == RAM_ADDR_MAX) {
1405 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1406 (uint64_t)size);
1407 abort();
1410 return offset;
1413 ram_addr_t last_ram_offset(void)
1415 RAMBlock *block;
1416 ram_addr_t last = 0;
1418 rcu_read_lock();
1419 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1420 last = MAX(last, block->offset + block->max_length);
1422 rcu_read_unlock();
1423 return last;
1426 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1428 int ret;
1430 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1431 if (!machine_dump_guest_core(current_machine)) {
1432 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1433 if (ret) {
1434 perror("qemu_madvise");
1435 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1436 "but dump_guest_core=off specified\n");
1441 const char *qemu_ram_get_idstr(RAMBlock *rb)
1443 return rb->idstr;
1446 /* Called with iothread lock held. */
1447 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1449 RAMBlock *block;
1451 assert(new_block);
1452 assert(!new_block->idstr[0]);
1454 if (dev) {
1455 char *id = qdev_get_dev_path(dev);
1456 if (id) {
1457 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1458 g_free(id);
1461 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1463 rcu_read_lock();
1464 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1465 if (block != new_block &&
1466 !strcmp(block->idstr, new_block->idstr)) {
1467 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1468 new_block->idstr);
1469 abort();
1472 rcu_read_unlock();
1475 /* Called with iothread lock held. */
1476 void qemu_ram_unset_idstr(RAMBlock *block)
1478 /* FIXME: arch_init.c assumes that this is not called throughout
1479 * migration. Ignore the problem since hot-unplug during migration
1480 * does not work anyway.
1482 if (block) {
1483 memset(block->idstr, 0, sizeof(block->idstr));
1487 static int memory_try_enable_merging(void *addr, size_t len)
1489 if (!machine_mem_merge(current_machine)) {
1490 /* disabled by the user */
1491 return 0;
1494 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1497 /* Only legal before guest might have detected the memory size: e.g. on
1498 * incoming migration, or right after reset.
1500 * As memory core doesn't know how is memory accessed, it is up to
1501 * resize callback to update device state and/or add assertions to detect
1502 * misuse, if necessary.
1504 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1506 assert(block);
1508 newsize = HOST_PAGE_ALIGN(newsize);
1510 if (block->used_length == newsize) {
1511 return 0;
1514 if (!(block->flags & RAM_RESIZEABLE)) {
1515 error_setg_errno(errp, EINVAL,
1516 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1517 " in != 0x" RAM_ADDR_FMT, block->idstr,
1518 newsize, block->used_length);
1519 return -EINVAL;
1522 if (block->max_length < newsize) {
1523 error_setg_errno(errp, EINVAL,
1524 "Length too large: %s: 0x" RAM_ADDR_FMT
1525 " > 0x" RAM_ADDR_FMT, block->idstr,
1526 newsize, block->max_length);
1527 return -EINVAL;
1530 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1531 block->used_length = newsize;
1532 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1533 DIRTY_CLIENTS_ALL);
1534 memory_region_set_size(block->mr, newsize);
1535 if (block->resized) {
1536 block->resized(block->idstr, newsize, block->host);
1538 return 0;
1541 /* Called with ram_list.mutex held */
1542 static void dirty_memory_extend(ram_addr_t old_ram_size,
1543 ram_addr_t new_ram_size)
1545 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1546 DIRTY_MEMORY_BLOCK_SIZE);
1547 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1548 DIRTY_MEMORY_BLOCK_SIZE);
1549 int i;
1551 /* Only need to extend if block count increased */
1552 if (new_num_blocks <= old_num_blocks) {
1553 return;
1556 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1557 DirtyMemoryBlocks *old_blocks;
1558 DirtyMemoryBlocks *new_blocks;
1559 int j;
1561 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1562 new_blocks = g_malloc(sizeof(*new_blocks) +
1563 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1565 if (old_num_blocks) {
1566 memcpy(new_blocks->blocks, old_blocks->blocks,
1567 old_num_blocks * sizeof(old_blocks->blocks[0]));
1570 for (j = old_num_blocks; j < new_num_blocks; j++) {
1571 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1574 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1576 if (old_blocks) {
1577 g_free_rcu(old_blocks, rcu);
1582 static void ram_block_add(RAMBlock *new_block, Error **errp)
1584 RAMBlock *block;
1585 RAMBlock *last_block = NULL;
1586 ram_addr_t old_ram_size, new_ram_size;
1587 Error *err = NULL;
1589 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1591 qemu_mutex_lock_ramlist();
1592 new_block->offset = find_ram_offset(new_block->max_length);
1594 if (!new_block->host) {
1595 if (xen_enabled()) {
1596 xen_ram_alloc(new_block->offset, new_block->max_length,
1597 new_block->mr, &err);
1598 if (err) {
1599 error_propagate(errp, err);
1600 qemu_mutex_unlock_ramlist();
1601 return;
1603 } else {
1604 new_block->host = phys_mem_alloc(new_block->max_length,
1605 &new_block->mr->align);
1606 if (!new_block->host) {
1607 error_setg_errno(errp, errno,
1608 "cannot set up guest memory '%s'",
1609 memory_region_name(new_block->mr));
1610 qemu_mutex_unlock_ramlist();
1611 return;
1613 memory_try_enable_merging(new_block->host, new_block->max_length);
1617 new_ram_size = MAX(old_ram_size,
1618 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1619 if (new_ram_size > old_ram_size) {
1620 migration_bitmap_extend(old_ram_size, new_ram_size);
1621 dirty_memory_extend(old_ram_size, new_ram_size);
1623 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1624 * QLIST (which has an RCU-friendly variant) does not have insertion at
1625 * tail, so save the last element in last_block.
1627 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1628 last_block = block;
1629 if (block->max_length < new_block->max_length) {
1630 break;
1633 if (block) {
1634 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1635 } else if (last_block) {
1636 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1637 } else { /* list is empty */
1638 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1640 ram_list.mru_block = NULL;
1642 /* Write list before version */
1643 smp_wmb();
1644 ram_list.version++;
1645 qemu_mutex_unlock_ramlist();
1647 cpu_physical_memory_set_dirty_range(new_block->offset,
1648 new_block->used_length,
1649 DIRTY_CLIENTS_ALL);
1651 if (new_block->host) {
1652 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1653 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1654 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1655 if (kvm_enabled()) {
1656 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1661 #ifdef __linux__
1662 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1663 bool share, const char *mem_path,
1664 Error **errp)
1666 RAMBlock *new_block;
1667 Error *local_err = NULL;
1669 if (xen_enabled()) {
1670 error_setg(errp, "-mem-path not supported with Xen");
1671 return NULL;
1674 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1676 * file_ram_alloc() needs to allocate just like
1677 * phys_mem_alloc, but we haven't bothered to provide
1678 * a hook there.
1680 error_setg(errp,
1681 "-mem-path not supported with this accelerator");
1682 return NULL;
1685 size = HOST_PAGE_ALIGN(size);
1686 new_block = g_malloc0(sizeof(*new_block));
1687 new_block->mr = mr;
1688 new_block->used_length = size;
1689 new_block->max_length = size;
1690 new_block->flags = share ? RAM_SHARED : 0;
1691 new_block->host = file_ram_alloc(new_block, size,
1692 mem_path, errp);
1693 if (!new_block->host) {
1694 g_free(new_block);
1695 return NULL;
1698 ram_block_add(new_block, &local_err);
1699 if (local_err) {
1700 g_free(new_block);
1701 error_propagate(errp, local_err);
1702 return NULL;
1704 return new_block;
1706 #endif
1708 static
1709 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1710 void (*resized)(const char*,
1711 uint64_t length,
1712 void *host),
1713 void *host, bool resizeable,
1714 MemoryRegion *mr, Error **errp)
1716 RAMBlock *new_block;
1717 Error *local_err = NULL;
1719 size = HOST_PAGE_ALIGN(size);
1720 max_size = HOST_PAGE_ALIGN(max_size);
1721 new_block = g_malloc0(sizeof(*new_block));
1722 new_block->mr = mr;
1723 new_block->resized = resized;
1724 new_block->used_length = size;
1725 new_block->max_length = max_size;
1726 assert(max_size >= size);
1727 new_block->fd = -1;
1728 new_block->host = host;
1729 if (host) {
1730 new_block->flags |= RAM_PREALLOC;
1732 if (resizeable) {
1733 new_block->flags |= RAM_RESIZEABLE;
1735 ram_block_add(new_block, &local_err);
1736 if (local_err) {
1737 g_free(new_block);
1738 error_propagate(errp, local_err);
1739 return NULL;
1741 return new_block;
1744 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1745 MemoryRegion *mr, Error **errp)
1747 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1750 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1752 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1755 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1756 void (*resized)(const char*,
1757 uint64_t length,
1758 void *host),
1759 MemoryRegion *mr, Error **errp)
1761 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1764 static void reclaim_ramblock(RAMBlock *block)
1766 if (block->flags & RAM_PREALLOC) {
1768 } else if (xen_enabled()) {
1769 xen_invalidate_map_cache_entry(block->host);
1770 #ifndef _WIN32
1771 } else if (block->fd >= 0) {
1772 qemu_ram_munmap(block->host, block->max_length);
1773 close(block->fd);
1774 #endif
1775 } else {
1776 qemu_anon_ram_free(block->host, block->max_length);
1778 g_free(block);
1781 void qemu_ram_free(RAMBlock *block)
1783 if (!block) {
1784 return;
1787 qemu_mutex_lock_ramlist();
1788 QLIST_REMOVE_RCU(block, next);
1789 ram_list.mru_block = NULL;
1790 /* Write list before version */
1791 smp_wmb();
1792 ram_list.version++;
1793 call_rcu(block, reclaim_ramblock, rcu);
1794 qemu_mutex_unlock_ramlist();
1797 #ifndef _WIN32
1798 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1800 RAMBlock *block;
1801 ram_addr_t offset;
1802 int flags;
1803 void *area, *vaddr;
1805 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1806 offset = addr - block->offset;
1807 if (offset < block->max_length) {
1808 vaddr = ramblock_ptr(block, offset);
1809 if (block->flags & RAM_PREALLOC) {
1811 } else if (xen_enabled()) {
1812 abort();
1813 } else {
1814 flags = MAP_FIXED;
1815 if (block->fd >= 0) {
1816 flags |= (block->flags & RAM_SHARED ?
1817 MAP_SHARED : MAP_PRIVATE);
1818 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1819 flags, block->fd, offset);
1820 } else {
1822 * Remap needs to match alloc. Accelerators that
1823 * set phys_mem_alloc never remap. If they did,
1824 * we'd need a remap hook here.
1826 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1828 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1829 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1830 flags, -1, 0);
1832 if (area != vaddr) {
1833 fprintf(stderr, "Could not remap addr: "
1834 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1835 length, addr);
1836 exit(1);
1838 memory_try_enable_merging(vaddr, length);
1839 qemu_ram_setup_dump(vaddr, length);
1844 #endif /* !_WIN32 */
1846 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1847 * This should not be used for general purpose DMA. Use address_space_map
1848 * or address_space_rw instead. For local memory (e.g. video ram) that the
1849 * device owns, use memory_region_get_ram_ptr.
1851 * Called within RCU critical section.
1853 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1855 RAMBlock *block = ram_block;
1857 if (block == NULL) {
1858 block = qemu_get_ram_block(addr);
1859 addr -= block->offset;
1862 if (xen_enabled() && block->host == NULL) {
1863 /* We need to check if the requested address is in the RAM
1864 * because we don't want to map the entire memory in QEMU.
1865 * In that case just map until the end of the page.
1867 if (block->offset == 0) {
1868 return xen_map_cache(addr, 0, 0);
1871 block->host = xen_map_cache(block->offset, block->max_length, 1);
1873 return ramblock_ptr(block, addr);
1876 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1877 * but takes a size argument.
1879 * Called within RCU critical section.
1881 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1882 hwaddr *size)
1884 RAMBlock *block = ram_block;
1885 if (*size == 0) {
1886 return NULL;
1889 if (block == NULL) {
1890 block = qemu_get_ram_block(addr);
1891 addr -= block->offset;
1893 *size = MIN(*size, block->max_length - addr);
1895 if (xen_enabled() && block->host == NULL) {
1896 /* We need to check if the requested address is in the RAM
1897 * because we don't want to map the entire memory in QEMU.
1898 * In that case just map the requested area.
1900 if (block->offset == 0) {
1901 return xen_map_cache(addr, *size, 1);
1904 block->host = xen_map_cache(block->offset, block->max_length, 1);
1907 return ramblock_ptr(block, addr);
1911 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1912 * in that RAMBlock.
1914 * ptr: Host pointer to look up
1915 * round_offset: If true round the result offset down to a page boundary
1916 * *ram_addr: set to result ram_addr
1917 * *offset: set to result offset within the RAMBlock
1919 * Returns: RAMBlock (or NULL if not found)
1921 * By the time this function returns, the returned pointer is not protected
1922 * by RCU anymore. If the caller is not within an RCU critical section and
1923 * does not hold the iothread lock, it must have other means of protecting the
1924 * pointer, such as a reference to the region that includes the incoming
1925 * ram_addr_t.
1927 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1928 ram_addr_t *offset)
1930 RAMBlock *block;
1931 uint8_t *host = ptr;
1933 if (xen_enabled()) {
1934 ram_addr_t ram_addr;
1935 rcu_read_lock();
1936 ram_addr = xen_ram_addr_from_mapcache(ptr);
1937 block = qemu_get_ram_block(ram_addr);
1938 if (block) {
1939 *offset = ram_addr - block->offset;
1941 rcu_read_unlock();
1942 return block;
1945 rcu_read_lock();
1946 block = atomic_rcu_read(&ram_list.mru_block);
1947 if (block && block->host && host - block->host < block->max_length) {
1948 goto found;
1951 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1952 /* This case append when the block is not mapped. */
1953 if (block->host == NULL) {
1954 continue;
1956 if (host - block->host < block->max_length) {
1957 goto found;
1961 rcu_read_unlock();
1962 return NULL;
1964 found:
1965 *offset = (host - block->host);
1966 if (round_offset) {
1967 *offset &= TARGET_PAGE_MASK;
1969 rcu_read_unlock();
1970 return block;
1974 * Finds the named RAMBlock
1976 * name: The name of RAMBlock to find
1978 * Returns: RAMBlock (or NULL if not found)
1980 RAMBlock *qemu_ram_block_by_name(const char *name)
1982 RAMBlock *block;
1984 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1985 if (!strcmp(name, block->idstr)) {
1986 return block;
1990 return NULL;
1993 /* Some of the softmmu routines need to translate from a host pointer
1994 (typically a TLB entry) back to a ram offset. */
1995 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1997 RAMBlock *block;
1998 ram_addr_t offset;
2000 block = qemu_ram_block_from_host(ptr, false, &offset);
2001 if (!block) {
2002 return RAM_ADDR_INVALID;
2005 return block->offset + offset;
2008 /* Called within RCU critical section. */
2009 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2010 uint64_t val, unsigned size)
2012 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2013 tb_invalidate_phys_page_fast(ram_addr, size);
2015 switch (size) {
2016 case 1:
2017 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2018 break;
2019 case 2:
2020 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2021 break;
2022 case 4:
2023 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2024 break;
2025 default:
2026 abort();
2028 /* Set both VGA and migration bits for simplicity and to remove
2029 * the notdirty callback faster.
2031 cpu_physical_memory_set_dirty_range(ram_addr, size,
2032 DIRTY_CLIENTS_NOCODE);
2033 /* we remove the notdirty callback only if the code has been
2034 flushed */
2035 if (!cpu_physical_memory_is_clean(ram_addr)) {
2036 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2040 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2041 unsigned size, bool is_write)
2043 return is_write;
2046 static const MemoryRegionOps notdirty_mem_ops = {
2047 .write = notdirty_mem_write,
2048 .valid.accepts = notdirty_mem_accepts,
2049 .endianness = DEVICE_NATIVE_ENDIAN,
2052 /* Generate a debug exception if a watchpoint has been hit. */
2053 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2055 CPUState *cpu = current_cpu;
2056 CPUClass *cc = CPU_GET_CLASS(cpu);
2057 CPUArchState *env = cpu->env_ptr;
2058 target_ulong pc, cs_base;
2059 target_ulong vaddr;
2060 CPUWatchpoint *wp;
2061 uint32_t cpu_flags;
2063 if (cpu->watchpoint_hit) {
2064 /* We re-entered the check after replacing the TB. Now raise
2065 * the debug interrupt so that is will trigger after the
2066 * current instruction. */
2067 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2068 return;
2070 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2071 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2072 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2073 && (wp->flags & flags)) {
2074 if (flags == BP_MEM_READ) {
2075 wp->flags |= BP_WATCHPOINT_HIT_READ;
2076 } else {
2077 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2079 wp->hitaddr = vaddr;
2080 wp->hitattrs = attrs;
2081 if (!cpu->watchpoint_hit) {
2082 if (wp->flags & BP_CPU &&
2083 !cc->debug_check_watchpoint(cpu, wp)) {
2084 wp->flags &= ~BP_WATCHPOINT_HIT;
2085 continue;
2087 cpu->watchpoint_hit = wp;
2088 tb_check_watchpoint(cpu);
2089 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2090 cpu->exception_index = EXCP_DEBUG;
2091 cpu_loop_exit(cpu);
2092 } else {
2093 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2094 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2095 cpu_loop_exit_noexc(cpu);
2098 } else {
2099 wp->flags &= ~BP_WATCHPOINT_HIT;
2104 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2105 so these check for a hit then pass through to the normal out-of-line
2106 phys routines. */
2107 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2108 unsigned size, MemTxAttrs attrs)
2110 MemTxResult res;
2111 uint64_t data;
2112 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2113 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2115 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2116 switch (size) {
2117 case 1:
2118 data = address_space_ldub(as, addr, attrs, &res);
2119 break;
2120 case 2:
2121 data = address_space_lduw(as, addr, attrs, &res);
2122 break;
2123 case 4:
2124 data = address_space_ldl(as, addr, attrs, &res);
2125 break;
2126 default: abort();
2128 *pdata = data;
2129 return res;
2132 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2133 uint64_t val, unsigned size,
2134 MemTxAttrs attrs)
2136 MemTxResult res;
2137 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2138 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2140 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2141 switch (size) {
2142 case 1:
2143 address_space_stb(as, addr, val, attrs, &res);
2144 break;
2145 case 2:
2146 address_space_stw(as, addr, val, attrs, &res);
2147 break;
2148 case 4:
2149 address_space_stl(as, addr, val, attrs, &res);
2150 break;
2151 default: abort();
2153 return res;
2156 static const MemoryRegionOps watch_mem_ops = {
2157 .read_with_attrs = watch_mem_read,
2158 .write_with_attrs = watch_mem_write,
2159 .endianness = DEVICE_NATIVE_ENDIAN,
2162 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2163 unsigned len, MemTxAttrs attrs)
2165 subpage_t *subpage = opaque;
2166 uint8_t buf[8];
2167 MemTxResult res;
2169 #if defined(DEBUG_SUBPAGE)
2170 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2171 subpage, len, addr);
2172 #endif
2173 res = address_space_read(subpage->as, addr + subpage->base,
2174 attrs, buf, len);
2175 if (res) {
2176 return res;
2178 switch (len) {
2179 case 1:
2180 *data = ldub_p(buf);
2181 return MEMTX_OK;
2182 case 2:
2183 *data = lduw_p(buf);
2184 return MEMTX_OK;
2185 case 4:
2186 *data = ldl_p(buf);
2187 return MEMTX_OK;
2188 case 8:
2189 *data = ldq_p(buf);
2190 return MEMTX_OK;
2191 default:
2192 abort();
2196 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2197 uint64_t value, unsigned len, MemTxAttrs attrs)
2199 subpage_t *subpage = opaque;
2200 uint8_t buf[8];
2202 #if defined(DEBUG_SUBPAGE)
2203 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2204 " value %"PRIx64"\n",
2205 __func__, subpage, len, addr, value);
2206 #endif
2207 switch (len) {
2208 case 1:
2209 stb_p(buf, value);
2210 break;
2211 case 2:
2212 stw_p(buf, value);
2213 break;
2214 case 4:
2215 stl_p(buf, value);
2216 break;
2217 case 8:
2218 stq_p(buf, value);
2219 break;
2220 default:
2221 abort();
2223 return address_space_write(subpage->as, addr + subpage->base,
2224 attrs, buf, len);
2227 static bool subpage_accepts(void *opaque, hwaddr addr,
2228 unsigned len, bool is_write)
2230 subpage_t *subpage = opaque;
2231 #if defined(DEBUG_SUBPAGE)
2232 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2233 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2234 #endif
2236 return address_space_access_valid(subpage->as, addr + subpage->base,
2237 len, is_write);
2240 static const MemoryRegionOps subpage_ops = {
2241 .read_with_attrs = subpage_read,
2242 .write_with_attrs = subpage_write,
2243 .impl.min_access_size = 1,
2244 .impl.max_access_size = 8,
2245 .valid.min_access_size = 1,
2246 .valid.max_access_size = 8,
2247 .valid.accepts = subpage_accepts,
2248 .endianness = DEVICE_NATIVE_ENDIAN,
2251 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2252 uint16_t section)
2254 int idx, eidx;
2256 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2257 return -1;
2258 idx = SUBPAGE_IDX(start);
2259 eidx = SUBPAGE_IDX(end);
2260 #if defined(DEBUG_SUBPAGE)
2261 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2262 __func__, mmio, start, end, idx, eidx, section);
2263 #endif
2264 for (; idx <= eidx; idx++) {
2265 mmio->sub_section[idx] = section;
2268 return 0;
2271 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2273 subpage_t *mmio;
2275 mmio = g_malloc0(sizeof(subpage_t));
2277 mmio->as = as;
2278 mmio->base = base;
2279 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2280 NULL, TARGET_PAGE_SIZE);
2281 mmio->iomem.subpage = true;
2282 #if defined(DEBUG_SUBPAGE)
2283 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2284 mmio, base, TARGET_PAGE_SIZE);
2285 #endif
2286 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2288 return mmio;
2291 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2292 MemoryRegion *mr)
2294 assert(as);
2295 MemoryRegionSection section = {
2296 .address_space = as,
2297 .mr = mr,
2298 .offset_within_address_space = 0,
2299 .offset_within_region = 0,
2300 .size = int128_2_64(),
2303 return phys_section_add(map, &section);
2306 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2308 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2309 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2310 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2311 MemoryRegionSection *sections = d->map.sections;
2313 return sections[index & ~TARGET_PAGE_MASK].mr;
2316 static void io_mem_init(void)
2318 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2319 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2320 NULL, UINT64_MAX);
2321 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2322 NULL, UINT64_MAX);
2323 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2324 NULL, UINT64_MAX);
2327 static void mem_begin(MemoryListener *listener)
2329 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2330 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2331 uint16_t n;
2333 n = dummy_section(&d->map, as, &io_mem_unassigned);
2334 assert(n == PHYS_SECTION_UNASSIGNED);
2335 n = dummy_section(&d->map, as, &io_mem_notdirty);
2336 assert(n == PHYS_SECTION_NOTDIRTY);
2337 n = dummy_section(&d->map, as, &io_mem_rom);
2338 assert(n == PHYS_SECTION_ROM);
2339 n = dummy_section(&d->map, as, &io_mem_watch);
2340 assert(n == PHYS_SECTION_WATCH);
2342 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2343 d->as = as;
2344 as->next_dispatch = d;
2347 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2349 phys_sections_free(&d->map);
2350 g_free(d);
2353 static void mem_commit(MemoryListener *listener)
2355 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2356 AddressSpaceDispatch *cur = as->dispatch;
2357 AddressSpaceDispatch *next = as->next_dispatch;
2359 phys_page_compact_all(next, next->map.nodes_nb);
2361 atomic_rcu_set(&as->dispatch, next);
2362 if (cur) {
2363 call_rcu(cur, address_space_dispatch_free, rcu);
2367 static void tcg_commit(MemoryListener *listener)
2369 CPUAddressSpace *cpuas;
2370 AddressSpaceDispatch *d;
2372 /* since each CPU stores ram addresses in its TLB cache, we must
2373 reset the modified entries */
2374 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2375 cpu_reloading_memory_map();
2376 /* The CPU and TLB are protected by the iothread lock.
2377 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2378 * may have split the RCU critical section.
2380 d = atomic_rcu_read(&cpuas->as->dispatch);
2381 cpuas->memory_dispatch = d;
2382 tlb_flush(cpuas->cpu, 1);
2385 void address_space_init_dispatch(AddressSpace *as)
2387 as->dispatch = NULL;
2388 as->dispatch_listener = (MemoryListener) {
2389 .begin = mem_begin,
2390 .commit = mem_commit,
2391 .region_add = mem_add,
2392 .region_nop = mem_add,
2393 .priority = 0,
2395 memory_listener_register(&as->dispatch_listener, as);
2398 void address_space_unregister(AddressSpace *as)
2400 memory_listener_unregister(&as->dispatch_listener);
2403 void address_space_destroy_dispatch(AddressSpace *as)
2405 AddressSpaceDispatch *d = as->dispatch;
2407 atomic_rcu_set(&as->dispatch, NULL);
2408 if (d) {
2409 call_rcu(d, address_space_dispatch_free, rcu);
2413 static void memory_map_init(void)
2415 system_memory = g_malloc(sizeof(*system_memory));
2417 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2418 address_space_init(&address_space_memory, system_memory, "memory");
2420 system_io = g_malloc(sizeof(*system_io));
2421 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2422 65536);
2423 address_space_init(&address_space_io, system_io, "I/O");
2426 MemoryRegion *get_system_memory(void)
2428 return system_memory;
2431 MemoryRegion *get_system_io(void)
2433 return system_io;
2436 #endif /* !defined(CONFIG_USER_ONLY) */
2438 /* physical memory access (slow version, mainly for debug) */
2439 #if defined(CONFIG_USER_ONLY)
2440 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2441 uint8_t *buf, int len, int is_write)
2443 int l, flags;
2444 target_ulong page;
2445 void * p;
2447 while (len > 0) {
2448 page = addr & TARGET_PAGE_MASK;
2449 l = (page + TARGET_PAGE_SIZE) - addr;
2450 if (l > len)
2451 l = len;
2452 flags = page_get_flags(page);
2453 if (!(flags & PAGE_VALID))
2454 return -1;
2455 if (is_write) {
2456 if (!(flags & PAGE_WRITE))
2457 return -1;
2458 /* XXX: this code should not depend on lock_user */
2459 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2460 return -1;
2461 memcpy(p, buf, l);
2462 unlock_user(p, addr, l);
2463 } else {
2464 if (!(flags & PAGE_READ))
2465 return -1;
2466 /* XXX: this code should not depend on lock_user */
2467 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2468 return -1;
2469 memcpy(buf, p, l);
2470 unlock_user(p, addr, 0);
2472 len -= l;
2473 buf += l;
2474 addr += l;
2476 return 0;
2479 #else
2481 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2482 hwaddr length)
2484 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2485 addr += memory_region_get_ram_addr(mr);
2487 /* No early return if dirty_log_mask is or becomes 0, because
2488 * cpu_physical_memory_set_dirty_range will still call
2489 * xen_modified_memory.
2491 if (dirty_log_mask) {
2492 dirty_log_mask =
2493 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2495 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2496 tb_invalidate_phys_range(addr, addr + length);
2497 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2499 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2502 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2504 unsigned access_size_max = mr->ops->valid.max_access_size;
2506 /* Regions are assumed to support 1-4 byte accesses unless
2507 otherwise specified. */
2508 if (access_size_max == 0) {
2509 access_size_max = 4;
2512 /* Bound the maximum access by the alignment of the address. */
2513 if (!mr->ops->impl.unaligned) {
2514 unsigned align_size_max = addr & -addr;
2515 if (align_size_max != 0 && align_size_max < access_size_max) {
2516 access_size_max = align_size_max;
2520 /* Don't attempt accesses larger than the maximum. */
2521 if (l > access_size_max) {
2522 l = access_size_max;
2524 l = pow2floor(l);
2526 return l;
2529 static bool prepare_mmio_access(MemoryRegion *mr)
2531 bool unlocked = !qemu_mutex_iothread_locked();
2532 bool release_lock = false;
2534 if (unlocked && mr->global_locking) {
2535 qemu_mutex_lock_iothread();
2536 unlocked = false;
2537 release_lock = true;
2539 if (mr->flush_coalesced_mmio) {
2540 if (unlocked) {
2541 qemu_mutex_lock_iothread();
2543 qemu_flush_coalesced_mmio_buffer();
2544 if (unlocked) {
2545 qemu_mutex_unlock_iothread();
2549 return release_lock;
2552 /* Called within RCU critical section. */
2553 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2554 MemTxAttrs attrs,
2555 const uint8_t *buf,
2556 int len, hwaddr addr1,
2557 hwaddr l, MemoryRegion *mr)
2559 uint8_t *ptr;
2560 uint64_t val;
2561 MemTxResult result = MEMTX_OK;
2562 bool release_lock = false;
2564 for (;;) {
2565 if (!memory_access_is_direct(mr, true)) {
2566 release_lock |= prepare_mmio_access(mr);
2567 l = memory_access_size(mr, l, addr1);
2568 /* XXX: could force current_cpu to NULL to avoid
2569 potential bugs */
2570 switch (l) {
2571 case 8:
2572 /* 64 bit write access */
2573 val = ldq_p(buf);
2574 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2575 attrs);
2576 break;
2577 case 4:
2578 /* 32 bit write access */
2579 val = ldl_p(buf);
2580 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2581 attrs);
2582 break;
2583 case 2:
2584 /* 16 bit write access */
2585 val = lduw_p(buf);
2586 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2587 attrs);
2588 break;
2589 case 1:
2590 /* 8 bit write access */
2591 val = ldub_p(buf);
2592 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2593 attrs);
2594 break;
2595 default:
2596 abort();
2598 } else {
2599 /* RAM case */
2600 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2601 memcpy(ptr, buf, l);
2602 invalidate_and_set_dirty(mr, addr1, l);
2605 if (release_lock) {
2606 qemu_mutex_unlock_iothread();
2607 release_lock = false;
2610 len -= l;
2611 buf += l;
2612 addr += l;
2614 if (!len) {
2615 break;
2618 l = len;
2619 mr = address_space_translate(as, addr, &addr1, &l, true);
2622 return result;
2625 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2626 const uint8_t *buf, int len)
2628 hwaddr l;
2629 hwaddr addr1;
2630 MemoryRegion *mr;
2631 MemTxResult result = MEMTX_OK;
2633 if (len > 0) {
2634 rcu_read_lock();
2635 l = len;
2636 mr = address_space_translate(as, addr, &addr1, &l, true);
2637 result = address_space_write_continue(as, addr, attrs, buf, len,
2638 addr1, l, mr);
2639 rcu_read_unlock();
2642 return result;
2645 /* Called within RCU critical section. */
2646 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2647 MemTxAttrs attrs, uint8_t *buf,
2648 int len, hwaddr addr1, hwaddr l,
2649 MemoryRegion *mr)
2651 uint8_t *ptr;
2652 uint64_t val;
2653 MemTxResult result = MEMTX_OK;
2654 bool release_lock = false;
2656 for (;;) {
2657 if (!memory_access_is_direct(mr, false)) {
2658 /* I/O case */
2659 release_lock |= prepare_mmio_access(mr);
2660 l = memory_access_size(mr, l, addr1);
2661 switch (l) {
2662 case 8:
2663 /* 64 bit read access */
2664 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2665 attrs);
2666 stq_p(buf, val);
2667 break;
2668 case 4:
2669 /* 32 bit read access */
2670 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2671 attrs);
2672 stl_p(buf, val);
2673 break;
2674 case 2:
2675 /* 16 bit read access */
2676 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2677 attrs);
2678 stw_p(buf, val);
2679 break;
2680 case 1:
2681 /* 8 bit read access */
2682 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2683 attrs);
2684 stb_p(buf, val);
2685 break;
2686 default:
2687 abort();
2689 } else {
2690 /* RAM case */
2691 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2692 memcpy(buf, ptr, l);
2695 if (release_lock) {
2696 qemu_mutex_unlock_iothread();
2697 release_lock = false;
2700 len -= l;
2701 buf += l;
2702 addr += l;
2704 if (!len) {
2705 break;
2708 l = len;
2709 mr = address_space_translate(as, addr, &addr1, &l, false);
2712 return result;
2715 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2716 MemTxAttrs attrs, uint8_t *buf, int len)
2718 hwaddr l;
2719 hwaddr addr1;
2720 MemoryRegion *mr;
2721 MemTxResult result = MEMTX_OK;
2723 if (len > 0) {
2724 rcu_read_lock();
2725 l = len;
2726 mr = address_space_translate(as, addr, &addr1, &l, false);
2727 result = address_space_read_continue(as, addr, attrs, buf, len,
2728 addr1, l, mr);
2729 rcu_read_unlock();
2732 return result;
2735 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2736 uint8_t *buf, int len, bool is_write)
2738 if (is_write) {
2739 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2740 } else {
2741 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2745 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2746 int len, int is_write)
2748 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2749 buf, len, is_write);
2752 enum write_rom_type {
2753 WRITE_DATA,
2754 FLUSH_CACHE,
2757 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2758 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2760 hwaddr l;
2761 uint8_t *ptr;
2762 hwaddr addr1;
2763 MemoryRegion *mr;
2765 rcu_read_lock();
2766 while (len > 0) {
2767 l = len;
2768 mr = address_space_translate(as, addr, &addr1, &l, true);
2770 if (!(memory_region_is_ram(mr) ||
2771 memory_region_is_romd(mr))) {
2772 l = memory_access_size(mr, l, addr1);
2773 } else {
2774 /* ROM/RAM case */
2775 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2776 switch (type) {
2777 case WRITE_DATA:
2778 memcpy(ptr, buf, l);
2779 invalidate_and_set_dirty(mr, addr1, l);
2780 break;
2781 case FLUSH_CACHE:
2782 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2783 break;
2786 len -= l;
2787 buf += l;
2788 addr += l;
2790 rcu_read_unlock();
2793 /* used for ROM loading : can write in RAM and ROM */
2794 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2795 const uint8_t *buf, int len)
2797 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2800 void cpu_flush_icache_range(hwaddr start, int len)
2803 * This function should do the same thing as an icache flush that was
2804 * triggered from within the guest. For TCG we are always cache coherent,
2805 * so there is no need to flush anything. For KVM / Xen we need to flush
2806 * the host's instruction cache at least.
2808 if (tcg_enabled()) {
2809 return;
2812 cpu_physical_memory_write_rom_internal(&address_space_memory,
2813 start, NULL, len, FLUSH_CACHE);
2816 typedef struct {
2817 MemoryRegion *mr;
2818 void *buffer;
2819 hwaddr addr;
2820 hwaddr len;
2821 bool in_use;
2822 } BounceBuffer;
2824 static BounceBuffer bounce;
2826 typedef struct MapClient {
2827 QEMUBH *bh;
2828 QLIST_ENTRY(MapClient) link;
2829 } MapClient;
2831 QemuMutex map_client_list_lock;
2832 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2833 = QLIST_HEAD_INITIALIZER(map_client_list);
2835 static void cpu_unregister_map_client_do(MapClient *client)
2837 QLIST_REMOVE(client, link);
2838 g_free(client);
2841 static void cpu_notify_map_clients_locked(void)
2843 MapClient *client;
2845 while (!QLIST_EMPTY(&map_client_list)) {
2846 client = QLIST_FIRST(&map_client_list);
2847 qemu_bh_schedule(client->bh);
2848 cpu_unregister_map_client_do(client);
2852 void cpu_register_map_client(QEMUBH *bh)
2854 MapClient *client = g_malloc(sizeof(*client));
2856 qemu_mutex_lock(&map_client_list_lock);
2857 client->bh = bh;
2858 QLIST_INSERT_HEAD(&map_client_list, client, link);
2859 if (!atomic_read(&bounce.in_use)) {
2860 cpu_notify_map_clients_locked();
2862 qemu_mutex_unlock(&map_client_list_lock);
2865 void cpu_exec_init_all(void)
2867 qemu_mutex_init(&ram_list.mutex);
2868 io_mem_init();
2869 memory_map_init();
2870 qemu_mutex_init(&map_client_list_lock);
2873 void cpu_unregister_map_client(QEMUBH *bh)
2875 MapClient *client;
2877 qemu_mutex_lock(&map_client_list_lock);
2878 QLIST_FOREACH(client, &map_client_list, link) {
2879 if (client->bh == bh) {
2880 cpu_unregister_map_client_do(client);
2881 break;
2884 qemu_mutex_unlock(&map_client_list_lock);
2887 static void cpu_notify_map_clients(void)
2889 qemu_mutex_lock(&map_client_list_lock);
2890 cpu_notify_map_clients_locked();
2891 qemu_mutex_unlock(&map_client_list_lock);
2894 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2896 MemoryRegion *mr;
2897 hwaddr l, xlat;
2899 rcu_read_lock();
2900 while (len > 0) {
2901 l = len;
2902 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2903 if (!memory_access_is_direct(mr, is_write)) {
2904 l = memory_access_size(mr, l, addr);
2905 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2906 return false;
2910 len -= l;
2911 addr += l;
2913 rcu_read_unlock();
2914 return true;
2917 /* Map a physical memory region into a host virtual address.
2918 * May map a subset of the requested range, given by and returned in *plen.
2919 * May return NULL if resources needed to perform the mapping are exhausted.
2920 * Use only for reads OR writes - not for read-modify-write operations.
2921 * Use cpu_register_map_client() to know when retrying the map operation is
2922 * likely to succeed.
2924 void *address_space_map(AddressSpace *as,
2925 hwaddr addr,
2926 hwaddr *plen,
2927 bool is_write)
2929 hwaddr len = *plen;
2930 hwaddr done = 0;
2931 hwaddr l, xlat, base;
2932 MemoryRegion *mr, *this_mr;
2933 void *ptr;
2935 if (len == 0) {
2936 return NULL;
2939 l = len;
2940 rcu_read_lock();
2941 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2943 if (!memory_access_is_direct(mr, is_write)) {
2944 if (atomic_xchg(&bounce.in_use, true)) {
2945 rcu_read_unlock();
2946 return NULL;
2948 /* Avoid unbounded allocations */
2949 l = MIN(l, TARGET_PAGE_SIZE);
2950 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2951 bounce.addr = addr;
2952 bounce.len = l;
2954 memory_region_ref(mr);
2955 bounce.mr = mr;
2956 if (!is_write) {
2957 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2958 bounce.buffer, l);
2961 rcu_read_unlock();
2962 *plen = l;
2963 return bounce.buffer;
2966 base = xlat;
2968 for (;;) {
2969 len -= l;
2970 addr += l;
2971 done += l;
2972 if (len == 0) {
2973 break;
2976 l = len;
2977 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2978 if (this_mr != mr || xlat != base + done) {
2979 break;
2983 memory_region_ref(mr);
2984 *plen = done;
2985 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2986 rcu_read_unlock();
2988 return ptr;
2991 /* Unmaps a memory region previously mapped by address_space_map().
2992 * Will also mark the memory as dirty if is_write == 1. access_len gives
2993 * the amount of memory that was actually read or written by the caller.
2995 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2996 int is_write, hwaddr access_len)
2998 if (buffer != bounce.buffer) {
2999 MemoryRegion *mr;
3000 ram_addr_t addr1;
3002 mr = memory_region_from_host(buffer, &addr1);
3003 assert(mr != NULL);
3004 if (is_write) {
3005 invalidate_and_set_dirty(mr, addr1, access_len);
3007 if (xen_enabled()) {
3008 xen_invalidate_map_cache_entry(buffer);
3010 memory_region_unref(mr);
3011 return;
3013 if (is_write) {
3014 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3015 bounce.buffer, access_len);
3017 qemu_vfree(bounce.buffer);
3018 bounce.buffer = NULL;
3019 memory_region_unref(bounce.mr);
3020 atomic_mb_set(&bounce.in_use, false);
3021 cpu_notify_map_clients();
3024 void *cpu_physical_memory_map(hwaddr addr,
3025 hwaddr *plen,
3026 int is_write)
3028 return address_space_map(&address_space_memory, addr, plen, is_write);
3031 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3032 int is_write, hwaddr access_len)
3034 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3037 /* warning: addr must be aligned */
3038 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3039 MemTxAttrs attrs,
3040 MemTxResult *result,
3041 enum device_endian endian)
3043 uint8_t *ptr;
3044 uint64_t val;
3045 MemoryRegion *mr;
3046 hwaddr l = 4;
3047 hwaddr addr1;
3048 MemTxResult r;
3049 bool release_lock = false;
3051 rcu_read_lock();
3052 mr = address_space_translate(as, addr, &addr1, &l, false);
3053 if (l < 4 || !memory_access_is_direct(mr, false)) {
3054 release_lock |= prepare_mmio_access(mr);
3056 /* I/O case */
3057 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3058 #if defined(TARGET_WORDS_BIGENDIAN)
3059 if (endian == DEVICE_LITTLE_ENDIAN) {
3060 val = bswap32(val);
3062 #else
3063 if (endian == DEVICE_BIG_ENDIAN) {
3064 val = bswap32(val);
3066 #endif
3067 } else {
3068 /* RAM case */
3069 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3070 switch (endian) {
3071 case DEVICE_LITTLE_ENDIAN:
3072 val = ldl_le_p(ptr);
3073 break;
3074 case DEVICE_BIG_ENDIAN:
3075 val = ldl_be_p(ptr);
3076 break;
3077 default:
3078 val = ldl_p(ptr);
3079 break;
3081 r = MEMTX_OK;
3083 if (result) {
3084 *result = r;
3086 if (release_lock) {
3087 qemu_mutex_unlock_iothread();
3089 rcu_read_unlock();
3090 return val;
3093 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3094 MemTxAttrs attrs, MemTxResult *result)
3096 return address_space_ldl_internal(as, addr, attrs, result,
3097 DEVICE_NATIVE_ENDIAN);
3100 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3101 MemTxAttrs attrs, MemTxResult *result)
3103 return address_space_ldl_internal(as, addr, attrs, result,
3104 DEVICE_LITTLE_ENDIAN);
3107 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3108 MemTxAttrs attrs, MemTxResult *result)
3110 return address_space_ldl_internal(as, addr, attrs, result,
3111 DEVICE_BIG_ENDIAN);
3114 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3116 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3119 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3121 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3124 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3126 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3129 /* warning: addr must be aligned */
3130 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3131 MemTxAttrs attrs,
3132 MemTxResult *result,
3133 enum device_endian endian)
3135 uint8_t *ptr;
3136 uint64_t val;
3137 MemoryRegion *mr;
3138 hwaddr l = 8;
3139 hwaddr addr1;
3140 MemTxResult r;
3141 bool release_lock = false;
3143 rcu_read_lock();
3144 mr = address_space_translate(as, addr, &addr1, &l,
3145 false);
3146 if (l < 8 || !memory_access_is_direct(mr, false)) {
3147 release_lock |= prepare_mmio_access(mr);
3149 /* I/O case */
3150 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3151 #if defined(TARGET_WORDS_BIGENDIAN)
3152 if (endian == DEVICE_LITTLE_ENDIAN) {
3153 val = bswap64(val);
3155 #else
3156 if (endian == DEVICE_BIG_ENDIAN) {
3157 val = bswap64(val);
3159 #endif
3160 } else {
3161 /* RAM case */
3162 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3163 switch (endian) {
3164 case DEVICE_LITTLE_ENDIAN:
3165 val = ldq_le_p(ptr);
3166 break;
3167 case DEVICE_BIG_ENDIAN:
3168 val = ldq_be_p(ptr);
3169 break;
3170 default:
3171 val = ldq_p(ptr);
3172 break;
3174 r = MEMTX_OK;
3176 if (result) {
3177 *result = r;
3179 if (release_lock) {
3180 qemu_mutex_unlock_iothread();
3182 rcu_read_unlock();
3183 return val;
3186 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3187 MemTxAttrs attrs, MemTxResult *result)
3189 return address_space_ldq_internal(as, addr, attrs, result,
3190 DEVICE_NATIVE_ENDIAN);
3193 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3194 MemTxAttrs attrs, MemTxResult *result)
3196 return address_space_ldq_internal(as, addr, attrs, result,
3197 DEVICE_LITTLE_ENDIAN);
3200 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3201 MemTxAttrs attrs, MemTxResult *result)
3203 return address_space_ldq_internal(as, addr, attrs, result,
3204 DEVICE_BIG_ENDIAN);
3207 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3209 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3212 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3214 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3217 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3219 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3222 /* XXX: optimize */
3223 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3224 MemTxAttrs attrs, MemTxResult *result)
3226 uint8_t val;
3227 MemTxResult r;
3229 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3230 if (result) {
3231 *result = r;
3233 return val;
3236 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3238 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3241 /* warning: addr must be aligned */
3242 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3243 hwaddr addr,
3244 MemTxAttrs attrs,
3245 MemTxResult *result,
3246 enum device_endian endian)
3248 uint8_t *ptr;
3249 uint64_t val;
3250 MemoryRegion *mr;
3251 hwaddr l = 2;
3252 hwaddr addr1;
3253 MemTxResult r;
3254 bool release_lock = false;
3256 rcu_read_lock();
3257 mr = address_space_translate(as, addr, &addr1, &l,
3258 false);
3259 if (l < 2 || !memory_access_is_direct(mr, false)) {
3260 release_lock |= prepare_mmio_access(mr);
3262 /* I/O case */
3263 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3264 #if defined(TARGET_WORDS_BIGENDIAN)
3265 if (endian == DEVICE_LITTLE_ENDIAN) {
3266 val = bswap16(val);
3268 #else
3269 if (endian == DEVICE_BIG_ENDIAN) {
3270 val = bswap16(val);
3272 #endif
3273 } else {
3274 /* RAM case */
3275 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3276 switch (endian) {
3277 case DEVICE_LITTLE_ENDIAN:
3278 val = lduw_le_p(ptr);
3279 break;
3280 case DEVICE_BIG_ENDIAN:
3281 val = lduw_be_p(ptr);
3282 break;
3283 default:
3284 val = lduw_p(ptr);
3285 break;
3287 r = MEMTX_OK;
3289 if (result) {
3290 *result = r;
3292 if (release_lock) {
3293 qemu_mutex_unlock_iothread();
3295 rcu_read_unlock();
3296 return val;
3299 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3300 MemTxAttrs attrs, MemTxResult *result)
3302 return address_space_lduw_internal(as, addr, attrs, result,
3303 DEVICE_NATIVE_ENDIAN);
3306 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3307 MemTxAttrs attrs, MemTxResult *result)
3309 return address_space_lduw_internal(as, addr, attrs, result,
3310 DEVICE_LITTLE_ENDIAN);
3313 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3314 MemTxAttrs attrs, MemTxResult *result)
3316 return address_space_lduw_internal(as, addr, attrs, result,
3317 DEVICE_BIG_ENDIAN);
3320 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3322 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3325 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3327 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3330 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3332 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3335 /* warning: addr must be aligned. The ram page is not masked as dirty
3336 and the code inside is not invalidated. It is useful if the dirty
3337 bits are used to track modified PTEs */
3338 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3339 MemTxAttrs attrs, MemTxResult *result)
3341 uint8_t *ptr;
3342 MemoryRegion *mr;
3343 hwaddr l = 4;
3344 hwaddr addr1;
3345 MemTxResult r;
3346 uint8_t dirty_log_mask;
3347 bool release_lock = false;
3349 rcu_read_lock();
3350 mr = address_space_translate(as, addr, &addr1, &l,
3351 true);
3352 if (l < 4 || !memory_access_is_direct(mr, true)) {
3353 release_lock |= prepare_mmio_access(mr);
3355 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3356 } else {
3357 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3358 stl_p(ptr, val);
3360 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3361 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3362 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3363 4, dirty_log_mask);
3364 r = MEMTX_OK;
3366 if (result) {
3367 *result = r;
3369 if (release_lock) {
3370 qemu_mutex_unlock_iothread();
3372 rcu_read_unlock();
3375 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3377 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3380 /* warning: addr must be aligned */
3381 static inline void address_space_stl_internal(AddressSpace *as,
3382 hwaddr addr, uint32_t val,
3383 MemTxAttrs attrs,
3384 MemTxResult *result,
3385 enum device_endian endian)
3387 uint8_t *ptr;
3388 MemoryRegion *mr;
3389 hwaddr l = 4;
3390 hwaddr addr1;
3391 MemTxResult r;
3392 bool release_lock = false;
3394 rcu_read_lock();
3395 mr = address_space_translate(as, addr, &addr1, &l,
3396 true);
3397 if (l < 4 || !memory_access_is_direct(mr, true)) {
3398 release_lock |= prepare_mmio_access(mr);
3400 #if defined(TARGET_WORDS_BIGENDIAN)
3401 if (endian == DEVICE_LITTLE_ENDIAN) {
3402 val = bswap32(val);
3404 #else
3405 if (endian == DEVICE_BIG_ENDIAN) {
3406 val = bswap32(val);
3408 #endif
3409 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3410 } else {
3411 /* RAM case */
3412 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3413 switch (endian) {
3414 case DEVICE_LITTLE_ENDIAN:
3415 stl_le_p(ptr, val);
3416 break;
3417 case DEVICE_BIG_ENDIAN:
3418 stl_be_p(ptr, val);
3419 break;
3420 default:
3421 stl_p(ptr, val);
3422 break;
3424 invalidate_and_set_dirty(mr, addr1, 4);
3425 r = MEMTX_OK;
3427 if (result) {
3428 *result = r;
3430 if (release_lock) {
3431 qemu_mutex_unlock_iothread();
3433 rcu_read_unlock();
3436 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3437 MemTxAttrs attrs, MemTxResult *result)
3439 address_space_stl_internal(as, addr, val, attrs, result,
3440 DEVICE_NATIVE_ENDIAN);
3443 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3444 MemTxAttrs attrs, MemTxResult *result)
3446 address_space_stl_internal(as, addr, val, attrs, result,
3447 DEVICE_LITTLE_ENDIAN);
3450 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3451 MemTxAttrs attrs, MemTxResult *result)
3453 address_space_stl_internal(as, addr, val, attrs, result,
3454 DEVICE_BIG_ENDIAN);
3457 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3459 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3462 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3464 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3467 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3469 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3472 /* XXX: optimize */
3473 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3474 MemTxAttrs attrs, MemTxResult *result)
3476 uint8_t v = val;
3477 MemTxResult r;
3479 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3480 if (result) {
3481 *result = r;
3485 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3487 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3490 /* warning: addr must be aligned */
3491 static inline void address_space_stw_internal(AddressSpace *as,
3492 hwaddr addr, uint32_t val,
3493 MemTxAttrs attrs,
3494 MemTxResult *result,
3495 enum device_endian endian)
3497 uint8_t *ptr;
3498 MemoryRegion *mr;
3499 hwaddr l = 2;
3500 hwaddr addr1;
3501 MemTxResult r;
3502 bool release_lock = false;
3504 rcu_read_lock();
3505 mr = address_space_translate(as, addr, &addr1, &l, true);
3506 if (l < 2 || !memory_access_is_direct(mr, true)) {
3507 release_lock |= prepare_mmio_access(mr);
3509 #if defined(TARGET_WORDS_BIGENDIAN)
3510 if (endian == DEVICE_LITTLE_ENDIAN) {
3511 val = bswap16(val);
3513 #else
3514 if (endian == DEVICE_BIG_ENDIAN) {
3515 val = bswap16(val);
3517 #endif
3518 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3519 } else {
3520 /* RAM case */
3521 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3522 switch (endian) {
3523 case DEVICE_LITTLE_ENDIAN:
3524 stw_le_p(ptr, val);
3525 break;
3526 case DEVICE_BIG_ENDIAN:
3527 stw_be_p(ptr, val);
3528 break;
3529 default:
3530 stw_p(ptr, val);
3531 break;
3533 invalidate_and_set_dirty(mr, addr1, 2);
3534 r = MEMTX_OK;
3536 if (result) {
3537 *result = r;
3539 if (release_lock) {
3540 qemu_mutex_unlock_iothread();
3542 rcu_read_unlock();
3545 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3546 MemTxAttrs attrs, MemTxResult *result)
3548 address_space_stw_internal(as, addr, val, attrs, result,
3549 DEVICE_NATIVE_ENDIAN);
3552 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3553 MemTxAttrs attrs, MemTxResult *result)
3555 address_space_stw_internal(as, addr, val, attrs, result,
3556 DEVICE_LITTLE_ENDIAN);
3559 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3560 MemTxAttrs attrs, MemTxResult *result)
3562 address_space_stw_internal(as, addr, val, attrs, result,
3563 DEVICE_BIG_ENDIAN);
3566 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3568 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3571 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3573 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3576 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3578 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3581 /* XXX: optimize */
3582 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3583 MemTxAttrs attrs, MemTxResult *result)
3585 MemTxResult r;
3586 val = tswap64(val);
3587 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3588 if (result) {
3589 *result = r;
3593 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3594 MemTxAttrs attrs, MemTxResult *result)
3596 MemTxResult r;
3597 val = cpu_to_le64(val);
3598 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3599 if (result) {
3600 *result = r;
3603 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3604 MemTxAttrs attrs, MemTxResult *result)
3606 MemTxResult r;
3607 val = cpu_to_be64(val);
3608 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3609 if (result) {
3610 *result = r;
3614 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3616 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3619 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3621 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3624 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3626 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3629 /* virtual memory access for debug (includes writing to ROM) */
3630 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3631 uint8_t *buf, int len, int is_write)
3633 int l;
3634 hwaddr phys_addr;
3635 target_ulong page;
3637 while (len > 0) {
3638 int asidx;
3639 MemTxAttrs attrs;
3641 page = addr & TARGET_PAGE_MASK;
3642 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3643 asidx = cpu_asidx_from_attrs(cpu, attrs);
3644 /* if no physical page mapped, return an error */
3645 if (phys_addr == -1)
3646 return -1;
3647 l = (page + TARGET_PAGE_SIZE) - addr;
3648 if (l > len)
3649 l = len;
3650 phys_addr += (addr & ~TARGET_PAGE_MASK);
3651 if (is_write) {
3652 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3653 phys_addr, buf, l);
3654 } else {
3655 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3656 MEMTXATTRS_UNSPECIFIED,
3657 buf, l, 0);
3659 len -= l;
3660 buf += l;
3661 addr += l;
3663 return 0;
3667 * Allows code that needs to deal with migration bitmaps etc to still be built
3668 * target independent.
3670 size_t qemu_target_page_bits(void)
3672 return TARGET_PAGE_BITS;
3675 #endif
3678 * A helper function for the _utterly broken_ virtio device model to find out if
3679 * it's running on a big endian machine. Don't do this at home kids!
3681 bool target_words_bigendian(void);
3682 bool target_words_bigendian(void)
3684 #if defined(TARGET_WORDS_BIGENDIAN)
3685 return true;
3686 #else
3687 return false;
3688 #endif
3691 #ifndef CONFIG_USER_ONLY
3692 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3694 MemoryRegion*mr;
3695 hwaddr l = 1;
3696 bool res;
3698 rcu_read_lock();
3699 mr = address_space_translate(&address_space_memory,
3700 phys_addr, &phys_addr, &l, false);
3702 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3703 rcu_read_unlock();
3704 return res;
3707 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3709 RAMBlock *block;
3710 int ret = 0;
3712 rcu_read_lock();
3713 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3714 ret = func(block->idstr, block->host, block->offset,
3715 block->used_length, opaque);
3716 if (ret) {
3717 break;
3720 rcu_read_unlock();
3721 return ret;
3723 #endif