block: assert that bs->request_alignment is a power of 2
[qemu/kevin.git] / exec.c
blobf2c9e374f5f541f1ea882dbc2f1a9328cba76fb8
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #include <sys/mman.h>
23 #endif
25 #include "qemu/cutils.h"
26 #include "cpu.h"
27 #include "exec/exec-all.h"
28 #include "tcg.h"
29 #include "hw/qdev-core.h"
30 #if !defined(CONFIG_USER_ONLY)
31 #include "hw/boards.h"
32 #include "hw/xen/xen.h"
33 #endif
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include <qemu.h>
41 #else /* !CONFIG_USER_ONLY */
42 #include "hw/hw.h"
43 #include "exec/memory.h"
44 #include "exec/ioport.h"
45 #include "sysemu/dma.h"
46 #include "exec/address-spaces.h"
47 #include "sysemu/xen-mapcache.h"
48 #include "trace.h"
49 #endif
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
58 #include "exec/log.h"
60 #include "migration/vmstate.h"
62 #include "qemu/range.h"
63 #ifndef _WIN32
64 #include "qemu/mmap-alloc.h"
65 #endif
67 //#define DEBUG_SUBPAGE
69 #if !defined(CONFIG_USER_ONLY)
70 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
71 * are protected by the ramlist lock.
73 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
75 static MemoryRegion *system_memory;
76 static MemoryRegion *system_io;
78 AddressSpace address_space_io;
79 AddressSpace address_space_memory;
81 MemoryRegion io_mem_rom, io_mem_notdirty;
82 static MemoryRegion io_mem_unassigned;
84 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
85 #define RAM_PREALLOC (1 << 0)
87 /* RAM is mmap-ed with MAP_SHARED */
88 #define RAM_SHARED (1 << 1)
90 /* Only a portion of RAM (used_length) is actually used, and migrated.
91 * This used_length size can change across reboots.
93 #define RAM_RESIZEABLE (1 << 2)
95 #endif
97 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
98 /* current CPU in the current thread. It is only valid inside
99 cpu_exec() */
100 __thread CPUState *current_cpu;
101 /* 0 = Do not count executed instructions.
102 1 = Precise instruction counting.
103 2 = Adaptive rate instruction counting. */
104 int use_icount;
106 #if !defined(CONFIG_USER_ONLY)
108 typedef struct PhysPageEntry PhysPageEntry;
110 struct PhysPageEntry {
111 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
112 uint32_t skip : 6;
113 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
114 uint32_t ptr : 26;
117 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
119 /* Size of the L2 (and L3, etc) page tables. */
120 #define ADDR_SPACE_BITS 64
122 #define P_L2_BITS 9
123 #define P_L2_SIZE (1 << P_L2_BITS)
125 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
127 typedef PhysPageEntry Node[P_L2_SIZE];
129 typedef struct PhysPageMap {
130 struct rcu_head rcu;
132 unsigned sections_nb;
133 unsigned sections_nb_alloc;
134 unsigned nodes_nb;
135 unsigned nodes_nb_alloc;
136 Node *nodes;
137 MemoryRegionSection *sections;
138 } PhysPageMap;
140 struct AddressSpaceDispatch {
141 struct rcu_head rcu;
143 MemoryRegionSection *mru_section;
144 /* This is a multi-level map on the physical address space.
145 * The bottom level has pointers to MemoryRegionSections.
147 PhysPageEntry phys_map;
148 PhysPageMap map;
149 AddressSpace *as;
152 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
153 typedef struct subpage_t {
154 MemoryRegion iomem;
155 AddressSpace *as;
156 hwaddr base;
157 uint16_t sub_section[TARGET_PAGE_SIZE];
158 } subpage_t;
160 #define PHYS_SECTION_UNASSIGNED 0
161 #define PHYS_SECTION_NOTDIRTY 1
162 #define PHYS_SECTION_ROM 2
163 #define PHYS_SECTION_WATCH 3
165 static void io_mem_init(void);
166 static void memory_map_init(void);
167 static void tcg_commit(MemoryListener *listener);
169 static MemoryRegion io_mem_watch;
172 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
173 * @cpu: the CPU whose AddressSpace this is
174 * @as: the AddressSpace itself
175 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
176 * @tcg_as_listener: listener for tracking changes to the AddressSpace
178 struct CPUAddressSpace {
179 CPUState *cpu;
180 AddressSpace *as;
181 struct AddressSpaceDispatch *memory_dispatch;
182 MemoryListener tcg_as_listener;
185 #endif
187 #if !defined(CONFIG_USER_ONLY)
189 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
191 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
192 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
193 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
194 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
198 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
200 unsigned i;
201 uint32_t ret;
202 PhysPageEntry e;
203 PhysPageEntry *p;
205 ret = map->nodes_nb++;
206 p = map->nodes[ret];
207 assert(ret != PHYS_MAP_NODE_NIL);
208 assert(ret != map->nodes_nb_alloc);
210 e.skip = leaf ? 0 : 1;
211 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
212 for (i = 0; i < P_L2_SIZE; ++i) {
213 memcpy(&p[i], &e, sizeof(e));
215 return ret;
218 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
219 hwaddr *index, hwaddr *nb, uint16_t leaf,
220 int level)
222 PhysPageEntry *p;
223 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
225 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
226 lp->ptr = phys_map_node_alloc(map, level == 0);
228 p = map->nodes[lp->ptr];
229 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
231 while (*nb && lp < &p[P_L2_SIZE]) {
232 if ((*index & (step - 1)) == 0 && *nb >= step) {
233 lp->skip = 0;
234 lp->ptr = leaf;
235 *index += step;
236 *nb -= step;
237 } else {
238 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
240 ++lp;
244 static void phys_page_set(AddressSpaceDispatch *d,
245 hwaddr index, hwaddr nb,
246 uint16_t leaf)
248 /* Wildly overreserve - it doesn't matter much. */
249 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
251 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
254 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
255 * and update our entry so we can skip it and go directly to the destination.
257 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
259 unsigned valid_ptr = P_L2_SIZE;
260 int valid = 0;
261 PhysPageEntry *p;
262 int i;
264 if (lp->ptr == PHYS_MAP_NODE_NIL) {
265 return;
268 p = nodes[lp->ptr];
269 for (i = 0; i < P_L2_SIZE; i++) {
270 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
271 continue;
274 valid_ptr = i;
275 valid++;
276 if (p[i].skip) {
277 phys_page_compact(&p[i], nodes, compacted);
281 /* We can only compress if there's only one child. */
282 if (valid != 1) {
283 return;
286 assert(valid_ptr < P_L2_SIZE);
288 /* Don't compress if it won't fit in the # of bits we have. */
289 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
290 return;
293 lp->ptr = p[valid_ptr].ptr;
294 if (!p[valid_ptr].skip) {
295 /* If our only child is a leaf, make this a leaf. */
296 /* By design, we should have made this node a leaf to begin with so we
297 * should never reach here.
298 * But since it's so simple to handle this, let's do it just in case we
299 * change this rule.
301 lp->skip = 0;
302 } else {
303 lp->skip += p[valid_ptr].skip;
307 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
309 DECLARE_BITMAP(compacted, nodes_nb);
311 if (d->phys_map.skip) {
312 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
316 static inline bool section_covers_addr(const MemoryRegionSection *section,
317 hwaddr addr)
319 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
320 * the section must cover the entire address space.
322 return section->size.hi ||
323 range_covers_byte(section->offset_within_address_space,
324 section->size.lo, addr);
327 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
328 Node *nodes, MemoryRegionSection *sections)
330 PhysPageEntry *p;
331 hwaddr index = addr >> TARGET_PAGE_BITS;
332 int i;
334 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
335 if (lp.ptr == PHYS_MAP_NODE_NIL) {
336 return &sections[PHYS_SECTION_UNASSIGNED];
338 p = nodes[lp.ptr];
339 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
342 if (section_covers_addr(&sections[lp.ptr], addr)) {
343 return &sections[lp.ptr];
344 } else {
345 return &sections[PHYS_SECTION_UNASSIGNED];
349 bool memory_region_is_unassigned(MemoryRegion *mr)
351 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
352 && mr != &io_mem_watch;
355 /* Called from RCU critical section */
356 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
357 hwaddr addr,
358 bool resolve_subpage)
360 MemoryRegionSection *section = atomic_read(&d->mru_section);
361 subpage_t *subpage;
362 bool update;
364 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
365 section_covers_addr(section, addr)) {
366 update = false;
367 } else {
368 section = phys_page_find(d->phys_map, addr, d->map.nodes,
369 d->map.sections);
370 update = true;
372 if (resolve_subpage && section->mr->subpage) {
373 subpage = container_of(section->mr, subpage_t, iomem);
374 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
376 if (update) {
377 atomic_set(&d->mru_section, section);
379 return section;
382 /* Called from RCU critical section */
383 static MemoryRegionSection *
384 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
385 hwaddr *plen, bool resolve_subpage)
387 MemoryRegionSection *section;
388 MemoryRegion *mr;
389 Int128 diff;
391 section = address_space_lookup_region(d, addr, resolve_subpage);
392 /* Compute offset within MemoryRegionSection */
393 addr -= section->offset_within_address_space;
395 /* Compute offset within MemoryRegion */
396 *xlat = addr + section->offset_within_region;
398 mr = section->mr;
400 /* MMIO registers can be expected to perform full-width accesses based only
401 * on their address, without considering adjacent registers that could
402 * decode to completely different MemoryRegions. When such registers
403 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
404 * regions overlap wildly. For this reason we cannot clamp the accesses
405 * here.
407 * If the length is small (as is the case for address_space_ldl/stl),
408 * everything works fine. If the incoming length is large, however,
409 * the caller really has to do the clamping through memory_access_size.
411 if (memory_region_is_ram(mr)) {
412 diff = int128_sub(section->size, int128_make64(addr));
413 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
415 return section;
418 /* Called from RCU critical section */
419 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
420 hwaddr *xlat, hwaddr *plen,
421 bool is_write)
423 IOMMUTLBEntry iotlb;
424 MemoryRegionSection *section;
425 MemoryRegion *mr;
427 for (;;) {
428 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
429 section = address_space_translate_internal(d, addr, &addr, plen, true);
430 mr = section->mr;
432 if (!mr->iommu_ops) {
433 break;
436 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
437 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
438 | (addr & iotlb.addr_mask));
439 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
440 if (!(iotlb.perm & (1 << is_write))) {
441 mr = &io_mem_unassigned;
442 break;
445 as = iotlb.target_as;
448 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
449 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
450 *plen = MIN(page, *plen);
453 *xlat = addr;
454 return mr;
457 /* Called from RCU critical section */
458 MemoryRegionSection *
459 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
460 hwaddr *xlat, hwaddr *plen)
462 MemoryRegionSection *section;
463 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
465 section = address_space_translate_internal(d, addr, xlat, plen, false);
467 assert(!section->mr->iommu_ops);
468 return section;
470 #endif
472 #if !defined(CONFIG_USER_ONLY)
474 static int cpu_common_post_load(void *opaque, int version_id)
476 CPUState *cpu = opaque;
478 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
479 version_id is increased. */
480 cpu->interrupt_request &= ~0x01;
481 tlb_flush(cpu, 1);
483 return 0;
486 static int cpu_common_pre_load(void *opaque)
488 CPUState *cpu = opaque;
490 cpu->exception_index = -1;
492 return 0;
495 static bool cpu_common_exception_index_needed(void *opaque)
497 CPUState *cpu = opaque;
499 return tcg_enabled() && cpu->exception_index != -1;
502 static const VMStateDescription vmstate_cpu_common_exception_index = {
503 .name = "cpu_common/exception_index",
504 .version_id = 1,
505 .minimum_version_id = 1,
506 .needed = cpu_common_exception_index_needed,
507 .fields = (VMStateField[]) {
508 VMSTATE_INT32(exception_index, CPUState),
509 VMSTATE_END_OF_LIST()
513 static bool cpu_common_crash_occurred_needed(void *opaque)
515 CPUState *cpu = opaque;
517 return cpu->crash_occurred;
520 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
521 .name = "cpu_common/crash_occurred",
522 .version_id = 1,
523 .minimum_version_id = 1,
524 .needed = cpu_common_crash_occurred_needed,
525 .fields = (VMStateField[]) {
526 VMSTATE_BOOL(crash_occurred, CPUState),
527 VMSTATE_END_OF_LIST()
531 const VMStateDescription vmstate_cpu_common = {
532 .name = "cpu_common",
533 .version_id = 1,
534 .minimum_version_id = 1,
535 .pre_load = cpu_common_pre_load,
536 .post_load = cpu_common_post_load,
537 .fields = (VMStateField[]) {
538 VMSTATE_UINT32(halted, CPUState),
539 VMSTATE_UINT32(interrupt_request, CPUState),
540 VMSTATE_END_OF_LIST()
542 .subsections = (const VMStateDescription*[]) {
543 &vmstate_cpu_common_exception_index,
544 &vmstate_cpu_common_crash_occurred,
545 NULL
549 #endif
551 CPUState *qemu_get_cpu(int index)
553 CPUState *cpu;
555 CPU_FOREACH(cpu) {
556 if (cpu->cpu_index == index) {
557 return cpu;
561 return NULL;
564 #if !defined(CONFIG_USER_ONLY)
565 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
567 CPUAddressSpace *newas;
569 /* Target code should have set num_ases before calling us */
570 assert(asidx < cpu->num_ases);
572 if (asidx == 0) {
573 /* address space 0 gets the convenience alias */
574 cpu->as = as;
577 /* KVM cannot currently support multiple address spaces. */
578 assert(asidx == 0 || !kvm_enabled());
580 if (!cpu->cpu_ases) {
581 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
584 newas = &cpu->cpu_ases[asidx];
585 newas->cpu = cpu;
586 newas->as = as;
587 if (tcg_enabled()) {
588 newas->tcg_as_listener.commit = tcg_commit;
589 memory_listener_register(&newas->tcg_as_listener, as);
593 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
595 /* Return the AddressSpace corresponding to the specified index */
596 return cpu->cpu_ases[asidx].as;
598 #endif
600 #ifndef CONFIG_USER_ONLY
601 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
603 static int cpu_get_free_index(Error **errp)
605 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
607 if (cpu >= MAX_CPUMASK_BITS) {
608 error_setg(errp, "Trying to use more CPUs than max of %d",
609 MAX_CPUMASK_BITS);
610 return -1;
613 bitmap_set(cpu_index_map, cpu, 1);
614 return cpu;
617 static void cpu_release_index(CPUState *cpu)
619 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
621 #else
623 static int cpu_get_free_index(Error **errp)
625 CPUState *some_cpu;
626 int cpu_index = 0;
628 CPU_FOREACH(some_cpu) {
629 cpu_index++;
631 return cpu_index;
634 static void cpu_release_index(CPUState *cpu)
636 return;
638 #endif
640 void cpu_exec_exit(CPUState *cpu)
642 CPUClass *cc = CPU_GET_CLASS(cpu);
644 #if defined(CONFIG_USER_ONLY)
645 cpu_list_lock();
646 #endif
647 if (cpu->cpu_index == -1) {
648 /* cpu_index was never allocated by this @cpu or was already freed. */
649 #if defined(CONFIG_USER_ONLY)
650 cpu_list_unlock();
651 #endif
652 return;
655 QTAILQ_REMOVE(&cpus, cpu, node);
656 cpu_release_index(cpu);
657 cpu->cpu_index = -1;
658 #if defined(CONFIG_USER_ONLY)
659 cpu_list_unlock();
660 #endif
662 if (cc->vmsd != NULL) {
663 vmstate_unregister(NULL, cc->vmsd, cpu);
665 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
666 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
670 void cpu_exec_init(CPUState *cpu, Error **errp)
672 CPUClass *cc = CPU_GET_CLASS(cpu);
673 Error *local_err = NULL;
675 cpu->as = NULL;
676 cpu->num_ases = 0;
678 #ifndef CONFIG_USER_ONLY
679 cpu->thread_id = qemu_get_thread_id();
681 /* This is a softmmu CPU object, so create a property for it
682 * so users can wire up its memory. (This can't go in qom/cpu.c
683 * because that file is compiled only once for both user-mode
684 * and system builds.) The default if no link is set up is to use
685 * the system address space.
687 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
688 (Object **)&cpu->memory,
689 qdev_prop_allow_set_link_before_realize,
690 OBJ_PROP_LINK_UNREF_ON_RELEASE,
691 &error_abort);
692 cpu->memory = system_memory;
693 object_ref(OBJECT(cpu->memory));
694 #endif
696 #if defined(CONFIG_USER_ONLY)
697 cpu_list_lock();
698 #endif
699 cpu->cpu_index = cpu_get_free_index(&local_err);
700 if (local_err) {
701 error_propagate(errp, local_err);
702 #if defined(CONFIG_USER_ONLY)
703 cpu_list_unlock();
704 #endif
705 return;
707 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
708 #if defined(CONFIG_USER_ONLY)
709 (void) cc;
710 cpu_list_unlock();
711 #else
712 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
713 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
715 if (cc->vmsd != NULL) {
716 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
718 #endif
721 #if defined(CONFIG_USER_ONLY)
722 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
724 tb_invalidate_phys_page_range(pc, pc + 1, 0);
726 #else
727 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
729 MemTxAttrs attrs;
730 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
731 int asidx = cpu_asidx_from_attrs(cpu, attrs);
732 if (phys != -1) {
733 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
734 phys | (pc & ~TARGET_PAGE_MASK));
737 #endif
739 #if defined(CONFIG_USER_ONLY)
740 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
745 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
746 int flags)
748 return -ENOSYS;
751 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
755 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
756 int flags, CPUWatchpoint **watchpoint)
758 return -ENOSYS;
760 #else
761 /* Add a watchpoint. */
762 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
763 int flags, CPUWatchpoint **watchpoint)
765 CPUWatchpoint *wp;
767 /* forbid ranges which are empty or run off the end of the address space */
768 if (len == 0 || (addr + len - 1) < addr) {
769 error_report("tried to set invalid watchpoint at %"
770 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
771 return -EINVAL;
773 wp = g_malloc(sizeof(*wp));
775 wp->vaddr = addr;
776 wp->len = len;
777 wp->flags = flags;
779 /* keep all GDB-injected watchpoints in front */
780 if (flags & BP_GDB) {
781 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
782 } else {
783 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
786 tlb_flush_page(cpu, addr);
788 if (watchpoint)
789 *watchpoint = wp;
790 return 0;
793 /* Remove a specific watchpoint. */
794 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
795 int flags)
797 CPUWatchpoint *wp;
799 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
800 if (addr == wp->vaddr && len == wp->len
801 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
802 cpu_watchpoint_remove_by_ref(cpu, wp);
803 return 0;
806 return -ENOENT;
809 /* Remove a specific watchpoint by reference. */
810 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
812 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
814 tlb_flush_page(cpu, watchpoint->vaddr);
816 g_free(watchpoint);
819 /* Remove all matching watchpoints. */
820 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
822 CPUWatchpoint *wp, *next;
824 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
825 if (wp->flags & mask) {
826 cpu_watchpoint_remove_by_ref(cpu, wp);
831 /* Return true if this watchpoint address matches the specified
832 * access (ie the address range covered by the watchpoint overlaps
833 * partially or completely with the address range covered by the
834 * access).
836 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
837 vaddr addr,
838 vaddr len)
840 /* We know the lengths are non-zero, but a little caution is
841 * required to avoid errors in the case where the range ends
842 * exactly at the top of the address space and so addr + len
843 * wraps round to zero.
845 vaddr wpend = wp->vaddr + wp->len - 1;
846 vaddr addrend = addr + len - 1;
848 return !(addr > wpend || wp->vaddr > addrend);
851 #endif
853 /* Add a breakpoint. */
854 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
855 CPUBreakpoint **breakpoint)
857 CPUBreakpoint *bp;
859 bp = g_malloc(sizeof(*bp));
861 bp->pc = pc;
862 bp->flags = flags;
864 /* keep all GDB-injected breakpoints in front */
865 if (flags & BP_GDB) {
866 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
867 } else {
868 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
871 breakpoint_invalidate(cpu, pc);
873 if (breakpoint) {
874 *breakpoint = bp;
876 return 0;
879 /* Remove a specific breakpoint. */
880 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
882 CPUBreakpoint *bp;
884 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
885 if (bp->pc == pc && bp->flags == flags) {
886 cpu_breakpoint_remove_by_ref(cpu, bp);
887 return 0;
890 return -ENOENT;
893 /* Remove a specific breakpoint by reference. */
894 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
896 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
898 breakpoint_invalidate(cpu, breakpoint->pc);
900 g_free(breakpoint);
903 /* Remove all matching breakpoints. */
904 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
906 CPUBreakpoint *bp, *next;
908 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
909 if (bp->flags & mask) {
910 cpu_breakpoint_remove_by_ref(cpu, bp);
915 /* enable or disable single step mode. EXCP_DEBUG is returned by the
916 CPU loop after each instruction */
917 void cpu_single_step(CPUState *cpu, int enabled)
919 if (cpu->singlestep_enabled != enabled) {
920 cpu->singlestep_enabled = enabled;
921 if (kvm_enabled()) {
922 kvm_update_guest_debug(cpu, 0);
923 } else {
924 /* must flush all the translated code to avoid inconsistencies */
925 /* XXX: only flush what is necessary */
926 tb_flush(cpu);
931 void cpu_abort(CPUState *cpu, const char *fmt, ...)
933 va_list ap;
934 va_list ap2;
936 va_start(ap, fmt);
937 va_copy(ap2, ap);
938 fprintf(stderr, "qemu: fatal: ");
939 vfprintf(stderr, fmt, ap);
940 fprintf(stderr, "\n");
941 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
942 if (qemu_log_separate()) {
943 qemu_log("qemu: fatal: ");
944 qemu_log_vprintf(fmt, ap2);
945 qemu_log("\n");
946 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
947 qemu_log_flush();
948 qemu_log_close();
950 va_end(ap2);
951 va_end(ap);
952 replay_finish();
953 #if defined(CONFIG_USER_ONLY)
955 struct sigaction act;
956 sigfillset(&act.sa_mask);
957 act.sa_handler = SIG_DFL;
958 sigaction(SIGABRT, &act, NULL);
960 #endif
961 abort();
964 #if !defined(CONFIG_USER_ONLY)
965 /* Called from RCU critical section */
966 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
968 RAMBlock *block;
970 block = atomic_rcu_read(&ram_list.mru_block);
971 if (block && addr - block->offset < block->max_length) {
972 return block;
974 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
975 if (addr - block->offset < block->max_length) {
976 goto found;
980 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
981 abort();
983 found:
984 /* It is safe to write mru_block outside the iothread lock. This
985 * is what happens:
987 * mru_block = xxx
988 * rcu_read_unlock()
989 * xxx removed from list
990 * rcu_read_lock()
991 * read mru_block
992 * mru_block = NULL;
993 * call_rcu(reclaim_ramblock, xxx);
994 * rcu_read_unlock()
996 * atomic_rcu_set is not needed here. The block was already published
997 * when it was placed into the list. Here we're just making an extra
998 * copy of the pointer.
1000 ram_list.mru_block = block;
1001 return block;
1004 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
1006 CPUState *cpu;
1007 ram_addr_t start1;
1008 RAMBlock *block;
1009 ram_addr_t end;
1011 end = TARGET_PAGE_ALIGN(start + length);
1012 start &= TARGET_PAGE_MASK;
1014 rcu_read_lock();
1015 block = qemu_get_ram_block(start);
1016 assert(block == qemu_get_ram_block(end - 1));
1017 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
1018 CPU_FOREACH(cpu) {
1019 tlb_reset_dirty(cpu, start1, length);
1021 rcu_read_unlock();
1024 /* Note: start and end must be within the same ram block. */
1025 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
1026 ram_addr_t length,
1027 unsigned client)
1029 DirtyMemoryBlocks *blocks;
1030 unsigned long end, page;
1031 bool dirty = false;
1033 if (length == 0) {
1034 return false;
1037 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1038 page = start >> TARGET_PAGE_BITS;
1040 rcu_read_lock();
1042 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1044 while (page < end) {
1045 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1046 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1047 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1049 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1050 offset, num);
1051 page += num;
1054 rcu_read_unlock();
1056 if (dirty && tcg_enabled()) {
1057 tlb_reset_dirty_range_all(start, length);
1060 return dirty;
1063 /* Called from RCU critical section */
1064 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1065 MemoryRegionSection *section,
1066 target_ulong vaddr,
1067 hwaddr paddr, hwaddr xlat,
1068 int prot,
1069 target_ulong *address)
1071 hwaddr iotlb;
1072 CPUWatchpoint *wp;
1074 if (memory_region_is_ram(section->mr)) {
1075 /* Normal RAM. */
1076 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1077 if (!section->readonly) {
1078 iotlb |= PHYS_SECTION_NOTDIRTY;
1079 } else {
1080 iotlb |= PHYS_SECTION_ROM;
1082 } else {
1083 AddressSpaceDispatch *d;
1085 d = atomic_rcu_read(&section->address_space->dispatch);
1086 iotlb = section - d->map.sections;
1087 iotlb += xlat;
1090 /* Make accesses to pages with watchpoints go via the
1091 watchpoint trap routines. */
1092 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1093 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1094 /* Avoid trapping reads of pages with a write breakpoint. */
1095 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1096 iotlb = PHYS_SECTION_WATCH + paddr;
1097 *address |= TLB_MMIO;
1098 break;
1103 return iotlb;
1105 #endif /* defined(CONFIG_USER_ONLY) */
1107 #if !defined(CONFIG_USER_ONLY)
1109 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1110 uint16_t section);
1111 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1113 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1114 qemu_anon_ram_alloc;
1117 * Set a custom physical guest memory alloator.
1118 * Accelerators with unusual needs may need this. Hopefully, we can
1119 * get rid of it eventually.
1121 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1123 phys_mem_alloc = alloc;
1126 static uint16_t phys_section_add(PhysPageMap *map,
1127 MemoryRegionSection *section)
1129 /* The physical section number is ORed with a page-aligned
1130 * pointer to produce the iotlb entries. Thus it should
1131 * never overflow into the page-aligned value.
1133 assert(map->sections_nb < TARGET_PAGE_SIZE);
1135 if (map->sections_nb == map->sections_nb_alloc) {
1136 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1137 map->sections = g_renew(MemoryRegionSection, map->sections,
1138 map->sections_nb_alloc);
1140 map->sections[map->sections_nb] = *section;
1141 memory_region_ref(section->mr);
1142 return map->sections_nb++;
1145 static void phys_section_destroy(MemoryRegion *mr)
1147 bool have_sub_page = mr->subpage;
1149 memory_region_unref(mr);
1151 if (have_sub_page) {
1152 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1153 object_unref(OBJECT(&subpage->iomem));
1154 g_free(subpage);
1158 static void phys_sections_free(PhysPageMap *map)
1160 while (map->sections_nb > 0) {
1161 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1162 phys_section_destroy(section->mr);
1164 g_free(map->sections);
1165 g_free(map->nodes);
1168 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1170 subpage_t *subpage;
1171 hwaddr base = section->offset_within_address_space
1172 & TARGET_PAGE_MASK;
1173 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1174 d->map.nodes, d->map.sections);
1175 MemoryRegionSection subsection = {
1176 .offset_within_address_space = base,
1177 .size = int128_make64(TARGET_PAGE_SIZE),
1179 hwaddr start, end;
1181 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1183 if (!(existing->mr->subpage)) {
1184 subpage = subpage_init(d->as, base);
1185 subsection.address_space = d->as;
1186 subsection.mr = &subpage->iomem;
1187 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1188 phys_section_add(&d->map, &subsection));
1189 } else {
1190 subpage = container_of(existing->mr, subpage_t, iomem);
1192 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1193 end = start + int128_get64(section->size) - 1;
1194 subpage_register(subpage, start, end,
1195 phys_section_add(&d->map, section));
1199 static void register_multipage(AddressSpaceDispatch *d,
1200 MemoryRegionSection *section)
1202 hwaddr start_addr = section->offset_within_address_space;
1203 uint16_t section_index = phys_section_add(&d->map, section);
1204 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1205 TARGET_PAGE_BITS));
1207 assert(num_pages);
1208 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1211 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1213 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1214 AddressSpaceDispatch *d = as->next_dispatch;
1215 MemoryRegionSection now = *section, remain = *section;
1216 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1218 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1219 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1220 - now.offset_within_address_space;
1222 now.size = int128_min(int128_make64(left), now.size);
1223 register_subpage(d, &now);
1224 } else {
1225 now.size = int128_zero();
1227 while (int128_ne(remain.size, now.size)) {
1228 remain.size = int128_sub(remain.size, now.size);
1229 remain.offset_within_address_space += int128_get64(now.size);
1230 remain.offset_within_region += int128_get64(now.size);
1231 now = remain;
1232 if (int128_lt(remain.size, page_size)) {
1233 register_subpage(d, &now);
1234 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1235 now.size = page_size;
1236 register_subpage(d, &now);
1237 } else {
1238 now.size = int128_and(now.size, int128_neg(page_size));
1239 register_multipage(d, &now);
1244 void qemu_flush_coalesced_mmio_buffer(void)
1246 if (kvm_enabled())
1247 kvm_flush_coalesced_mmio_buffer();
1250 void qemu_mutex_lock_ramlist(void)
1252 qemu_mutex_lock(&ram_list.mutex);
1255 void qemu_mutex_unlock_ramlist(void)
1257 qemu_mutex_unlock(&ram_list.mutex);
1260 #ifdef __linux__
1261 static void *file_ram_alloc(RAMBlock *block,
1262 ram_addr_t memory,
1263 const char *path,
1264 Error **errp)
1266 bool unlink_on_error = false;
1267 char *filename;
1268 char *sanitized_name;
1269 char *c;
1270 void *area;
1271 int fd = -1;
1272 int64_t page_size;
1274 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1275 error_setg(errp,
1276 "host lacks kvm mmu notifiers, -mem-path unsupported");
1277 return NULL;
1280 for (;;) {
1281 fd = open(path, O_RDWR);
1282 if (fd >= 0) {
1283 /* @path names an existing file, use it */
1284 break;
1286 if (errno == ENOENT) {
1287 /* @path names a file that doesn't exist, create it */
1288 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1289 if (fd >= 0) {
1290 unlink_on_error = true;
1291 break;
1293 } else if (errno == EISDIR) {
1294 /* @path names a directory, create a file there */
1295 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1296 sanitized_name = g_strdup(memory_region_name(block->mr));
1297 for (c = sanitized_name; *c != '\0'; c++) {
1298 if (*c == '/') {
1299 *c = '_';
1303 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1304 sanitized_name);
1305 g_free(sanitized_name);
1307 fd = mkstemp(filename);
1308 if (fd >= 0) {
1309 unlink(filename);
1310 g_free(filename);
1311 break;
1313 g_free(filename);
1315 if (errno != EEXIST && errno != EINTR) {
1316 error_setg_errno(errp, errno,
1317 "can't open backing store %s for guest RAM",
1318 path);
1319 goto error;
1322 * Try again on EINTR and EEXIST. The latter happens when
1323 * something else creates the file between our two open().
1327 page_size = qemu_fd_getpagesize(fd);
1328 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1330 if (memory < page_size) {
1331 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1332 "or larger than page size 0x%" PRIx64,
1333 memory, page_size);
1334 goto error;
1337 memory = ROUND_UP(memory, page_size);
1340 * ftruncate is not supported by hugetlbfs in older
1341 * hosts, so don't bother bailing out on errors.
1342 * If anything goes wrong with it under other filesystems,
1343 * mmap will fail.
1345 if (ftruncate(fd, memory)) {
1346 perror("ftruncate");
1349 area = qemu_ram_mmap(fd, memory, block->mr->align,
1350 block->flags & RAM_SHARED);
1351 if (area == MAP_FAILED) {
1352 error_setg_errno(errp, errno,
1353 "unable to map backing store for guest RAM");
1354 goto error;
1357 if (mem_prealloc) {
1358 os_mem_prealloc(fd, area, memory);
1361 block->fd = fd;
1362 return area;
1364 error:
1365 if (unlink_on_error) {
1366 unlink(path);
1368 if (fd != -1) {
1369 close(fd);
1371 return NULL;
1373 #endif
1375 /* Called with the ramlist lock held. */
1376 static ram_addr_t find_ram_offset(ram_addr_t size)
1378 RAMBlock *block, *next_block;
1379 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1381 assert(size != 0); /* it would hand out same offset multiple times */
1383 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1384 return 0;
1387 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1388 ram_addr_t end, next = RAM_ADDR_MAX;
1390 end = block->offset + block->max_length;
1392 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1393 if (next_block->offset >= end) {
1394 next = MIN(next, next_block->offset);
1397 if (next - end >= size && next - end < mingap) {
1398 offset = end;
1399 mingap = next - end;
1403 if (offset == RAM_ADDR_MAX) {
1404 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1405 (uint64_t)size);
1406 abort();
1409 return offset;
1412 ram_addr_t last_ram_offset(void)
1414 RAMBlock *block;
1415 ram_addr_t last = 0;
1417 rcu_read_lock();
1418 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1419 last = MAX(last, block->offset + block->max_length);
1421 rcu_read_unlock();
1422 return last;
1425 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1427 int ret;
1429 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1430 if (!machine_dump_guest_core(current_machine)) {
1431 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1432 if (ret) {
1433 perror("qemu_madvise");
1434 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1435 "but dump_guest_core=off specified\n");
1440 const char *qemu_ram_get_idstr(RAMBlock *rb)
1442 return rb->idstr;
1445 /* Called with iothread lock held. */
1446 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1448 RAMBlock *block;
1450 assert(new_block);
1451 assert(!new_block->idstr[0]);
1453 if (dev) {
1454 char *id = qdev_get_dev_path(dev);
1455 if (id) {
1456 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1457 g_free(id);
1460 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1462 rcu_read_lock();
1463 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1464 if (block != new_block &&
1465 !strcmp(block->idstr, new_block->idstr)) {
1466 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1467 new_block->idstr);
1468 abort();
1471 rcu_read_unlock();
1474 /* Called with iothread lock held. */
1475 void qemu_ram_unset_idstr(RAMBlock *block)
1477 /* FIXME: arch_init.c assumes that this is not called throughout
1478 * migration. Ignore the problem since hot-unplug during migration
1479 * does not work anyway.
1481 if (block) {
1482 memset(block->idstr, 0, sizeof(block->idstr));
1486 static int memory_try_enable_merging(void *addr, size_t len)
1488 if (!machine_mem_merge(current_machine)) {
1489 /* disabled by the user */
1490 return 0;
1493 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1496 /* Only legal before guest might have detected the memory size: e.g. on
1497 * incoming migration, or right after reset.
1499 * As memory core doesn't know how is memory accessed, it is up to
1500 * resize callback to update device state and/or add assertions to detect
1501 * misuse, if necessary.
1503 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1505 assert(block);
1507 newsize = HOST_PAGE_ALIGN(newsize);
1509 if (block->used_length == newsize) {
1510 return 0;
1513 if (!(block->flags & RAM_RESIZEABLE)) {
1514 error_setg_errno(errp, EINVAL,
1515 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1516 " in != 0x" RAM_ADDR_FMT, block->idstr,
1517 newsize, block->used_length);
1518 return -EINVAL;
1521 if (block->max_length < newsize) {
1522 error_setg_errno(errp, EINVAL,
1523 "Length too large: %s: 0x" RAM_ADDR_FMT
1524 " > 0x" RAM_ADDR_FMT, block->idstr,
1525 newsize, block->max_length);
1526 return -EINVAL;
1529 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1530 block->used_length = newsize;
1531 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1532 DIRTY_CLIENTS_ALL);
1533 memory_region_set_size(block->mr, newsize);
1534 if (block->resized) {
1535 block->resized(block->idstr, newsize, block->host);
1537 return 0;
1540 /* Called with ram_list.mutex held */
1541 static void dirty_memory_extend(ram_addr_t old_ram_size,
1542 ram_addr_t new_ram_size)
1544 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1545 DIRTY_MEMORY_BLOCK_SIZE);
1546 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1547 DIRTY_MEMORY_BLOCK_SIZE);
1548 int i;
1550 /* Only need to extend if block count increased */
1551 if (new_num_blocks <= old_num_blocks) {
1552 return;
1555 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1556 DirtyMemoryBlocks *old_blocks;
1557 DirtyMemoryBlocks *new_blocks;
1558 int j;
1560 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1561 new_blocks = g_malloc(sizeof(*new_blocks) +
1562 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1564 if (old_num_blocks) {
1565 memcpy(new_blocks->blocks, old_blocks->blocks,
1566 old_num_blocks * sizeof(old_blocks->blocks[0]));
1569 for (j = old_num_blocks; j < new_num_blocks; j++) {
1570 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1573 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1575 if (old_blocks) {
1576 g_free_rcu(old_blocks, rcu);
1581 static void ram_block_add(RAMBlock *new_block, Error **errp)
1583 RAMBlock *block;
1584 RAMBlock *last_block = NULL;
1585 ram_addr_t old_ram_size, new_ram_size;
1586 Error *err = NULL;
1588 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1590 qemu_mutex_lock_ramlist();
1591 new_block->offset = find_ram_offset(new_block->max_length);
1593 if (!new_block->host) {
1594 if (xen_enabled()) {
1595 xen_ram_alloc(new_block->offset, new_block->max_length,
1596 new_block->mr, &err);
1597 if (err) {
1598 error_propagate(errp, err);
1599 qemu_mutex_unlock_ramlist();
1600 return;
1602 } else {
1603 new_block->host = phys_mem_alloc(new_block->max_length,
1604 &new_block->mr->align);
1605 if (!new_block->host) {
1606 error_setg_errno(errp, errno,
1607 "cannot set up guest memory '%s'",
1608 memory_region_name(new_block->mr));
1609 qemu_mutex_unlock_ramlist();
1610 return;
1612 memory_try_enable_merging(new_block->host, new_block->max_length);
1616 new_ram_size = MAX(old_ram_size,
1617 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1618 if (new_ram_size > old_ram_size) {
1619 migration_bitmap_extend(old_ram_size, new_ram_size);
1620 dirty_memory_extend(old_ram_size, new_ram_size);
1622 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1623 * QLIST (which has an RCU-friendly variant) does not have insertion at
1624 * tail, so save the last element in last_block.
1626 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1627 last_block = block;
1628 if (block->max_length < new_block->max_length) {
1629 break;
1632 if (block) {
1633 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1634 } else if (last_block) {
1635 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1636 } else { /* list is empty */
1637 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1639 ram_list.mru_block = NULL;
1641 /* Write list before version */
1642 smp_wmb();
1643 ram_list.version++;
1644 qemu_mutex_unlock_ramlist();
1646 cpu_physical_memory_set_dirty_range(new_block->offset,
1647 new_block->used_length,
1648 DIRTY_CLIENTS_ALL);
1650 if (new_block->host) {
1651 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1652 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1653 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1654 if (kvm_enabled()) {
1655 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1660 #ifdef __linux__
1661 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1662 bool share, const char *mem_path,
1663 Error **errp)
1665 RAMBlock *new_block;
1666 Error *local_err = NULL;
1668 if (xen_enabled()) {
1669 error_setg(errp, "-mem-path not supported with Xen");
1670 return NULL;
1673 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1675 * file_ram_alloc() needs to allocate just like
1676 * phys_mem_alloc, but we haven't bothered to provide
1677 * a hook there.
1679 error_setg(errp,
1680 "-mem-path not supported with this accelerator");
1681 return NULL;
1684 size = HOST_PAGE_ALIGN(size);
1685 new_block = g_malloc0(sizeof(*new_block));
1686 new_block->mr = mr;
1687 new_block->used_length = size;
1688 new_block->max_length = size;
1689 new_block->flags = share ? RAM_SHARED : 0;
1690 new_block->host = file_ram_alloc(new_block, size,
1691 mem_path, errp);
1692 if (!new_block->host) {
1693 g_free(new_block);
1694 return NULL;
1697 ram_block_add(new_block, &local_err);
1698 if (local_err) {
1699 g_free(new_block);
1700 error_propagate(errp, local_err);
1701 return NULL;
1703 return new_block;
1705 #endif
1707 static
1708 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1709 void (*resized)(const char*,
1710 uint64_t length,
1711 void *host),
1712 void *host, bool resizeable,
1713 MemoryRegion *mr, Error **errp)
1715 RAMBlock *new_block;
1716 Error *local_err = NULL;
1718 size = HOST_PAGE_ALIGN(size);
1719 max_size = HOST_PAGE_ALIGN(max_size);
1720 new_block = g_malloc0(sizeof(*new_block));
1721 new_block->mr = mr;
1722 new_block->resized = resized;
1723 new_block->used_length = size;
1724 new_block->max_length = max_size;
1725 assert(max_size >= size);
1726 new_block->fd = -1;
1727 new_block->host = host;
1728 if (host) {
1729 new_block->flags |= RAM_PREALLOC;
1731 if (resizeable) {
1732 new_block->flags |= RAM_RESIZEABLE;
1734 ram_block_add(new_block, &local_err);
1735 if (local_err) {
1736 g_free(new_block);
1737 error_propagate(errp, local_err);
1738 return NULL;
1740 return new_block;
1743 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1744 MemoryRegion *mr, Error **errp)
1746 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1749 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1751 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1754 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1755 void (*resized)(const char*,
1756 uint64_t length,
1757 void *host),
1758 MemoryRegion *mr, Error **errp)
1760 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1763 static void reclaim_ramblock(RAMBlock *block)
1765 if (block->flags & RAM_PREALLOC) {
1767 } else if (xen_enabled()) {
1768 xen_invalidate_map_cache_entry(block->host);
1769 #ifndef _WIN32
1770 } else if (block->fd >= 0) {
1771 qemu_ram_munmap(block->host, block->max_length);
1772 close(block->fd);
1773 #endif
1774 } else {
1775 qemu_anon_ram_free(block->host, block->max_length);
1777 g_free(block);
1780 void qemu_ram_free(RAMBlock *block)
1782 if (!block) {
1783 return;
1786 qemu_mutex_lock_ramlist();
1787 QLIST_REMOVE_RCU(block, next);
1788 ram_list.mru_block = NULL;
1789 /* Write list before version */
1790 smp_wmb();
1791 ram_list.version++;
1792 call_rcu(block, reclaim_ramblock, rcu);
1793 qemu_mutex_unlock_ramlist();
1796 #ifndef _WIN32
1797 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1799 RAMBlock *block;
1800 ram_addr_t offset;
1801 int flags;
1802 void *area, *vaddr;
1804 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1805 offset = addr - block->offset;
1806 if (offset < block->max_length) {
1807 vaddr = ramblock_ptr(block, offset);
1808 if (block->flags & RAM_PREALLOC) {
1810 } else if (xen_enabled()) {
1811 abort();
1812 } else {
1813 flags = MAP_FIXED;
1814 if (block->fd >= 0) {
1815 flags |= (block->flags & RAM_SHARED ?
1816 MAP_SHARED : MAP_PRIVATE);
1817 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1818 flags, block->fd, offset);
1819 } else {
1821 * Remap needs to match alloc. Accelerators that
1822 * set phys_mem_alloc never remap. If they did,
1823 * we'd need a remap hook here.
1825 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1827 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1828 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1829 flags, -1, 0);
1831 if (area != vaddr) {
1832 fprintf(stderr, "Could not remap addr: "
1833 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1834 length, addr);
1835 exit(1);
1837 memory_try_enable_merging(vaddr, length);
1838 qemu_ram_setup_dump(vaddr, length);
1843 #endif /* !_WIN32 */
1845 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1846 * This should not be used for general purpose DMA. Use address_space_map
1847 * or address_space_rw instead. For local memory (e.g. video ram) that the
1848 * device owns, use memory_region_get_ram_ptr.
1850 * Called within RCU critical section.
1852 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1854 RAMBlock *block = ram_block;
1856 if (block == NULL) {
1857 block = qemu_get_ram_block(addr);
1858 addr -= block->offset;
1861 if (xen_enabled() && block->host == NULL) {
1862 /* We need to check if the requested address is in the RAM
1863 * because we don't want to map the entire memory in QEMU.
1864 * In that case just map until the end of the page.
1866 if (block->offset == 0) {
1867 return xen_map_cache(addr, 0, 0);
1870 block->host = xen_map_cache(block->offset, block->max_length, 1);
1872 return ramblock_ptr(block, addr);
1875 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1876 * but takes a size argument.
1878 * Called within RCU critical section.
1880 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1881 hwaddr *size)
1883 RAMBlock *block = ram_block;
1884 if (*size == 0) {
1885 return NULL;
1888 if (block == NULL) {
1889 block = qemu_get_ram_block(addr);
1890 addr -= block->offset;
1892 *size = MIN(*size, block->max_length - addr);
1894 if (xen_enabled() && block->host == NULL) {
1895 /* We need to check if the requested address is in the RAM
1896 * because we don't want to map the entire memory in QEMU.
1897 * In that case just map the requested area.
1899 if (block->offset == 0) {
1900 return xen_map_cache(addr, *size, 1);
1903 block->host = xen_map_cache(block->offset, block->max_length, 1);
1906 return ramblock_ptr(block, addr);
1910 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1911 * in that RAMBlock.
1913 * ptr: Host pointer to look up
1914 * round_offset: If true round the result offset down to a page boundary
1915 * *ram_addr: set to result ram_addr
1916 * *offset: set to result offset within the RAMBlock
1918 * Returns: RAMBlock (or NULL if not found)
1920 * By the time this function returns, the returned pointer is not protected
1921 * by RCU anymore. If the caller is not within an RCU critical section and
1922 * does not hold the iothread lock, it must have other means of protecting the
1923 * pointer, such as a reference to the region that includes the incoming
1924 * ram_addr_t.
1926 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1927 ram_addr_t *offset)
1929 RAMBlock *block;
1930 uint8_t *host = ptr;
1932 if (xen_enabled()) {
1933 ram_addr_t ram_addr;
1934 rcu_read_lock();
1935 ram_addr = xen_ram_addr_from_mapcache(ptr);
1936 block = qemu_get_ram_block(ram_addr);
1937 if (block) {
1938 *offset = (host - block->host);
1940 rcu_read_unlock();
1941 return block;
1944 rcu_read_lock();
1945 block = atomic_rcu_read(&ram_list.mru_block);
1946 if (block && block->host && host - block->host < block->max_length) {
1947 goto found;
1950 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1951 /* This case append when the block is not mapped. */
1952 if (block->host == NULL) {
1953 continue;
1955 if (host - block->host < block->max_length) {
1956 goto found;
1960 rcu_read_unlock();
1961 return NULL;
1963 found:
1964 *offset = (host - block->host);
1965 if (round_offset) {
1966 *offset &= TARGET_PAGE_MASK;
1968 rcu_read_unlock();
1969 return block;
1973 * Finds the named RAMBlock
1975 * name: The name of RAMBlock to find
1977 * Returns: RAMBlock (or NULL if not found)
1979 RAMBlock *qemu_ram_block_by_name(const char *name)
1981 RAMBlock *block;
1983 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1984 if (!strcmp(name, block->idstr)) {
1985 return block;
1989 return NULL;
1992 /* Some of the softmmu routines need to translate from a host pointer
1993 (typically a TLB entry) back to a ram offset. */
1994 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1996 RAMBlock *block;
1997 ram_addr_t offset;
1999 block = qemu_ram_block_from_host(ptr, false, &offset);
2000 if (!block) {
2001 return RAM_ADDR_INVALID;
2004 return block->offset + offset;
2007 /* Called within RCU critical section. */
2008 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2009 uint64_t val, unsigned size)
2011 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2012 tb_invalidate_phys_page_fast(ram_addr, size);
2014 switch (size) {
2015 case 1:
2016 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2017 break;
2018 case 2:
2019 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2020 break;
2021 case 4:
2022 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2023 break;
2024 default:
2025 abort();
2027 /* Set both VGA and migration bits for simplicity and to remove
2028 * the notdirty callback faster.
2030 cpu_physical_memory_set_dirty_range(ram_addr, size,
2031 DIRTY_CLIENTS_NOCODE);
2032 /* we remove the notdirty callback only if the code has been
2033 flushed */
2034 if (!cpu_physical_memory_is_clean(ram_addr)) {
2035 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2039 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2040 unsigned size, bool is_write)
2042 return is_write;
2045 static const MemoryRegionOps notdirty_mem_ops = {
2046 .write = notdirty_mem_write,
2047 .valid.accepts = notdirty_mem_accepts,
2048 .endianness = DEVICE_NATIVE_ENDIAN,
2051 /* Generate a debug exception if a watchpoint has been hit. */
2052 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2054 CPUState *cpu = current_cpu;
2055 CPUClass *cc = CPU_GET_CLASS(cpu);
2056 CPUArchState *env = cpu->env_ptr;
2057 target_ulong pc, cs_base;
2058 target_ulong vaddr;
2059 CPUWatchpoint *wp;
2060 uint32_t cpu_flags;
2062 if (cpu->watchpoint_hit) {
2063 /* We re-entered the check after replacing the TB. Now raise
2064 * the debug interrupt so that is will trigger after the
2065 * current instruction. */
2066 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2067 return;
2069 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2070 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2071 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2072 && (wp->flags & flags)) {
2073 if (flags == BP_MEM_READ) {
2074 wp->flags |= BP_WATCHPOINT_HIT_READ;
2075 } else {
2076 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2078 wp->hitaddr = vaddr;
2079 wp->hitattrs = attrs;
2080 if (!cpu->watchpoint_hit) {
2081 if (wp->flags & BP_CPU &&
2082 !cc->debug_check_watchpoint(cpu, wp)) {
2083 wp->flags &= ~BP_WATCHPOINT_HIT;
2084 continue;
2086 cpu->watchpoint_hit = wp;
2087 tb_check_watchpoint(cpu);
2088 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2089 cpu->exception_index = EXCP_DEBUG;
2090 cpu_loop_exit(cpu);
2091 } else {
2092 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2093 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2094 cpu_resume_from_signal(cpu, NULL);
2097 } else {
2098 wp->flags &= ~BP_WATCHPOINT_HIT;
2103 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2104 so these check for a hit then pass through to the normal out-of-line
2105 phys routines. */
2106 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2107 unsigned size, MemTxAttrs attrs)
2109 MemTxResult res;
2110 uint64_t data;
2111 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2112 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2114 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2115 switch (size) {
2116 case 1:
2117 data = address_space_ldub(as, addr, attrs, &res);
2118 break;
2119 case 2:
2120 data = address_space_lduw(as, addr, attrs, &res);
2121 break;
2122 case 4:
2123 data = address_space_ldl(as, addr, attrs, &res);
2124 break;
2125 default: abort();
2127 *pdata = data;
2128 return res;
2131 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2132 uint64_t val, unsigned size,
2133 MemTxAttrs attrs)
2135 MemTxResult res;
2136 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2137 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2139 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2140 switch (size) {
2141 case 1:
2142 address_space_stb(as, addr, val, attrs, &res);
2143 break;
2144 case 2:
2145 address_space_stw(as, addr, val, attrs, &res);
2146 break;
2147 case 4:
2148 address_space_stl(as, addr, val, attrs, &res);
2149 break;
2150 default: abort();
2152 return res;
2155 static const MemoryRegionOps watch_mem_ops = {
2156 .read_with_attrs = watch_mem_read,
2157 .write_with_attrs = watch_mem_write,
2158 .endianness = DEVICE_NATIVE_ENDIAN,
2161 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2162 unsigned len, MemTxAttrs attrs)
2164 subpage_t *subpage = opaque;
2165 uint8_t buf[8];
2166 MemTxResult res;
2168 #if defined(DEBUG_SUBPAGE)
2169 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2170 subpage, len, addr);
2171 #endif
2172 res = address_space_read(subpage->as, addr + subpage->base,
2173 attrs, buf, len);
2174 if (res) {
2175 return res;
2177 switch (len) {
2178 case 1:
2179 *data = ldub_p(buf);
2180 return MEMTX_OK;
2181 case 2:
2182 *data = lduw_p(buf);
2183 return MEMTX_OK;
2184 case 4:
2185 *data = ldl_p(buf);
2186 return MEMTX_OK;
2187 case 8:
2188 *data = ldq_p(buf);
2189 return MEMTX_OK;
2190 default:
2191 abort();
2195 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2196 uint64_t value, unsigned len, MemTxAttrs attrs)
2198 subpage_t *subpage = opaque;
2199 uint8_t buf[8];
2201 #if defined(DEBUG_SUBPAGE)
2202 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2203 " value %"PRIx64"\n",
2204 __func__, subpage, len, addr, value);
2205 #endif
2206 switch (len) {
2207 case 1:
2208 stb_p(buf, value);
2209 break;
2210 case 2:
2211 stw_p(buf, value);
2212 break;
2213 case 4:
2214 stl_p(buf, value);
2215 break;
2216 case 8:
2217 stq_p(buf, value);
2218 break;
2219 default:
2220 abort();
2222 return address_space_write(subpage->as, addr + subpage->base,
2223 attrs, buf, len);
2226 static bool subpage_accepts(void *opaque, hwaddr addr,
2227 unsigned len, bool is_write)
2229 subpage_t *subpage = opaque;
2230 #if defined(DEBUG_SUBPAGE)
2231 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2232 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2233 #endif
2235 return address_space_access_valid(subpage->as, addr + subpage->base,
2236 len, is_write);
2239 static const MemoryRegionOps subpage_ops = {
2240 .read_with_attrs = subpage_read,
2241 .write_with_attrs = subpage_write,
2242 .impl.min_access_size = 1,
2243 .impl.max_access_size = 8,
2244 .valid.min_access_size = 1,
2245 .valid.max_access_size = 8,
2246 .valid.accepts = subpage_accepts,
2247 .endianness = DEVICE_NATIVE_ENDIAN,
2250 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2251 uint16_t section)
2253 int idx, eidx;
2255 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2256 return -1;
2257 idx = SUBPAGE_IDX(start);
2258 eidx = SUBPAGE_IDX(end);
2259 #if defined(DEBUG_SUBPAGE)
2260 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2261 __func__, mmio, start, end, idx, eidx, section);
2262 #endif
2263 for (; idx <= eidx; idx++) {
2264 mmio->sub_section[idx] = section;
2267 return 0;
2270 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2272 subpage_t *mmio;
2274 mmio = g_malloc0(sizeof(subpage_t));
2276 mmio->as = as;
2277 mmio->base = base;
2278 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2279 NULL, TARGET_PAGE_SIZE);
2280 mmio->iomem.subpage = true;
2281 #if defined(DEBUG_SUBPAGE)
2282 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2283 mmio, base, TARGET_PAGE_SIZE);
2284 #endif
2285 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2287 return mmio;
2290 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2291 MemoryRegion *mr)
2293 assert(as);
2294 MemoryRegionSection section = {
2295 .address_space = as,
2296 .mr = mr,
2297 .offset_within_address_space = 0,
2298 .offset_within_region = 0,
2299 .size = int128_2_64(),
2302 return phys_section_add(map, &section);
2305 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2307 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2308 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2309 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2310 MemoryRegionSection *sections = d->map.sections;
2312 return sections[index & ~TARGET_PAGE_MASK].mr;
2315 static void io_mem_init(void)
2317 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2318 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2319 NULL, UINT64_MAX);
2320 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2321 NULL, UINT64_MAX);
2322 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2323 NULL, UINT64_MAX);
2326 static void mem_begin(MemoryListener *listener)
2328 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2329 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2330 uint16_t n;
2332 n = dummy_section(&d->map, as, &io_mem_unassigned);
2333 assert(n == PHYS_SECTION_UNASSIGNED);
2334 n = dummy_section(&d->map, as, &io_mem_notdirty);
2335 assert(n == PHYS_SECTION_NOTDIRTY);
2336 n = dummy_section(&d->map, as, &io_mem_rom);
2337 assert(n == PHYS_SECTION_ROM);
2338 n = dummy_section(&d->map, as, &io_mem_watch);
2339 assert(n == PHYS_SECTION_WATCH);
2341 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2342 d->as = as;
2343 as->next_dispatch = d;
2346 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2348 phys_sections_free(&d->map);
2349 g_free(d);
2352 static void mem_commit(MemoryListener *listener)
2354 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2355 AddressSpaceDispatch *cur = as->dispatch;
2356 AddressSpaceDispatch *next = as->next_dispatch;
2358 phys_page_compact_all(next, next->map.nodes_nb);
2360 atomic_rcu_set(&as->dispatch, next);
2361 if (cur) {
2362 call_rcu(cur, address_space_dispatch_free, rcu);
2366 static void tcg_commit(MemoryListener *listener)
2368 CPUAddressSpace *cpuas;
2369 AddressSpaceDispatch *d;
2371 /* since each CPU stores ram addresses in its TLB cache, we must
2372 reset the modified entries */
2373 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2374 cpu_reloading_memory_map();
2375 /* The CPU and TLB are protected by the iothread lock.
2376 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2377 * may have split the RCU critical section.
2379 d = atomic_rcu_read(&cpuas->as->dispatch);
2380 cpuas->memory_dispatch = d;
2381 tlb_flush(cpuas->cpu, 1);
2384 void address_space_init_dispatch(AddressSpace *as)
2386 as->dispatch = NULL;
2387 as->dispatch_listener = (MemoryListener) {
2388 .begin = mem_begin,
2389 .commit = mem_commit,
2390 .region_add = mem_add,
2391 .region_nop = mem_add,
2392 .priority = 0,
2394 memory_listener_register(&as->dispatch_listener, as);
2397 void address_space_unregister(AddressSpace *as)
2399 memory_listener_unregister(&as->dispatch_listener);
2402 void address_space_destroy_dispatch(AddressSpace *as)
2404 AddressSpaceDispatch *d = as->dispatch;
2406 atomic_rcu_set(&as->dispatch, NULL);
2407 if (d) {
2408 call_rcu(d, address_space_dispatch_free, rcu);
2412 static void memory_map_init(void)
2414 system_memory = g_malloc(sizeof(*system_memory));
2416 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2417 address_space_init(&address_space_memory, system_memory, "memory");
2419 system_io = g_malloc(sizeof(*system_io));
2420 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2421 65536);
2422 address_space_init(&address_space_io, system_io, "I/O");
2425 MemoryRegion *get_system_memory(void)
2427 return system_memory;
2430 MemoryRegion *get_system_io(void)
2432 return system_io;
2435 #endif /* !defined(CONFIG_USER_ONLY) */
2437 /* physical memory access (slow version, mainly for debug) */
2438 #if defined(CONFIG_USER_ONLY)
2439 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2440 uint8_t *buf, int len, int is_write)
2442 int l, flags;
2443 target_ulong page;
2444 void * p;
2446 while (len > 0) {
2447 page = addr & TARGET_PAGE_MASK;
2448 l = (page + TARGET_PAGE_SIZE) - addr;
2449 if (l > len)
2450 l = len;
2451 flags = page_get_flags(page);
2452 if (!(flags & PAGE_VALID))
2453 return -1;
2454 if (is_write) {
2455 if (!(flags & PAGE_WRITE))
2456 return -1;
2457 /* XXX: this code should not depend on lock_user */
2458 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2459 return -1;
2460 memcpy(p, buf, l);
2461 unlock_user(p, addr, l);
2462 } else {
2463 if (!(flags & PAGE_READ))
2464 return -1;
2465 /* XXX: this code should not depend on lock_user */
2466 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2467 return -1;
2468 memcpy(buf, p, l);
2469 unlock_user(p, addr, 0);
2471 len -= l;
2472 buf += l;
2473 addr += l;
2475 return 0;
2478 #else
2480 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2481 hwaddr length)
2483 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2484 addr += memory_region_get_ram_addr(mr);
2486 /* No early return if dirty_log_mask is or becomes 0, because
2487 * cpu_physical_memory_set_dirty_range will still call
2488 * xen_modified_memory.
2490 if (dirty_log_mask) {
2491 dirty_log_mask =
2492 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2494 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2495 tb_invalidate_phys_range(addr, addr + length);
2496 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2498 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2501 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2503 unsigned access_size_max = mr->ops->valid.max_access_size;
2505 /* Regions are assumed to support 1-4 byte accesses unless
2506 otherwise specified. */
2507 if (access_size_max == 0) {
2508 access_size_max = 4;
2511 /* Bound the maximum access by the alignment of the address. */
2512 if (!mr->ops->impl.unaligned) {
2513 unsigned align_size_max = addr & -addr;
2514 if (align_size_max != 0 && align_size_max < access_size_max) {
2515 access_size_max = align_size_max;
2519 /* Don't attempt accesses larger than the maximum. */
2520 if (l > access_size_max) {
2521 l = access_size_max;
2523 l = pow2floor(l);
2525 return l;
2528 static bool prepare_mmio_access(MemoryRegion *mr)
2530 bool unlocked = !qemu_mutex_iothread_locked();
2531 bool release_lock = false;
2533 if (unlocked && mr->global_locking) {
2534 qemu_mutex_lock_iothread();
2535 unlocked = false;
2536 release_lock = true;
2538 if (mr->flush_coalesced_mmio) {
2539 if (unlocked) {
2540 qemu_mutex_lock_iothread();
2542 qemu_flush_coalesced_mmio_buffer();
2543 if (unlocked) {
2544 qemu_mutex_unlock_iothread();
2548 return release_lock;
2551 /* Called within RCU critical section. */
2552 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2553 MemTxAttrs attrs,
2554 const uint8_t *buf,
2555 int len, hwaddr addr1,
2556 hwaddr l, MemoryRegion *mr)
2558 uint8_t *ptr;
2559 uint64_t val;
2560 MemTxResult result = MEMTX_OK;
2561 bool release_lock = false;
2563 for (;;) {
2564 if (!memory_access_is_direct(mr, true)) {
2565 release_lock |= prepare_mmio_access(mr);
2566 l = memory_access_size(mr, l, addr1);
2567 /* XXX: could force current_cpu to NULL to avoid
2568 potential bugs */
2569 switch (l) {
2570 case 8:
2571 /* 64 bit write access */
2572 val = ldq_p(buf);
2573 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2574 attrs);
2575 break;
2576 case 4:
2577 /* 32 bit write access */
2578 val = ldl_p(buf);
2579 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2580 attrs);
2581 break;
2582 case 2:
2583 /* 16 bit write access */
2584 val = lduw_p(buf);
2585 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2586 attrs);
2587 break;
2588 case 1:
2589 /* 8 bit write access */
2590 val = ldub_p(buf);
2591 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2592 attrs);
2593 break;
2594 default:
2595 abort();
2597 } else {
2598 /* RAM case */
2599 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2600 memcpy(ptr, buf, l);
2601 invalidate_and_set_dirty(mr, addr1, l);
2604 if (release_lock) {
2605 qemu_mutex_unlock_iothread();
2606 release_lock = false;
2609 len -= l;
2610 buf += l;
2611 addr += l;
2613 if (!len) {
2614 break;
2617 l = len;
2618 mr = address_space_translate(as, addr, &addr1, &l, true);
2621 return result;
2624 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2625 const uint8_t *buf, int len)
2627 hwaddr l;
2628 hwaddr addr1;
2629 MemoryRegion *mr;
2630 MemTxResult result = MEMTX_OK;
2632 if (len > 0) {
2633 rcu_read_lock();
2634 l = len;
2635 mr = address_space_translate(as, addr, &addr1, &l, true);
2636 result = address_space_write_continue(as, addr, attrs, buf, len,
2637 addr1, l, mr);
2638 rcu_read_unlock();
2641 return result;
2644 /* Called within RCU critical section. */
2645 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2646 MemTxAttrs attrs, uint8_t *buf,
2647 int len, hwaddr addr1, hwaddr l,
2648 MemoryRegion *mr)
2650 uint8_t *ptr;
2651 uint64_t val;
2652 MemTxResult result = MEMTX_OK;
2653 bool release_lock = false;
2655 for (;;) {
2656 if (!memory_access_is_direct(mr, false)) {
2657 /* I/O case */
2658 release_lock |= prepare_mmio_access(mr);
2659 l = memory_access_size(mr, l, addr1);
2660 switch (l) {
2661 case 8:
2662 /* 64 bit read access */
2663 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2664 attrs);
2665 stq_p(buf, val);
2666 break;
2667 case 4:
2668 /* 32 bit read access */
2669 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2670 attrs);
2671 stl_p(buf, val);
2672 break;
2673 case 2:
2674 /* 16 bit read access */
2675 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2676 attrs);
2677 stw_p(buf, val);
2678 break;
2679 case 1:
2680 /* 8 bit read access */
2681 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2682 attrs);
2683 stb_p(buf, val);
2684 break;
2685 default:
2686 abort();
2688 } else {
2689 /* RAM case */
2690 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2691 memcpy(buf, ptr, l);
2694 if (release_lock) {
2695 qemu_mutex_unlock_iothread();
2696 release_lock = false;
2699 len -= l;
2700 buf += l;
2701 addr += l;
2703 if (!len) {
2704 break;
2707 l = len;
2708 mr = address_space_translate(as, addr, &addr1, &l, false);
2711 return result;
2714 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2715 MemTxAttrs attrs, uint8_t *buf, int len)
2717 hwaddr l;
2718 hwaddr addr1;
2719 MemoryRegion *mr;
2720 MemTxResult result = MEMTX_OK;
2722 if (len > 0) {
2723 rcu_read_lock();
2724 l = len;
2725 mr = address_space_translate(as, addr, &addr1, &l, false);
2726 result = address_space_read_continue(as, addr, attrs, buf, len,
2727 addr1, l, mr);
2728 rcu_read_unlock();
2731 return result;
2734 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2735 uint8_t *buf, int len, bool is_write)
2737 if (is_write) {
2738 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2739 } else {
2740 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2744 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2745 int len, int is_write)
2747 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2748 buf, len, is_write);
2751 enum write_rom_type {
2752 WRITE_DATA,
2753 FLUSH_CACHE,
2756 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2757 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2759 hwaddr l;
2760 uint8_t *ptr;
2761 hwaddr addr1;
2762 MemoryRegion *mr;
2764 rcu_read_lock();
2765 while (len > 0) {
2766 l = len;
2767 mr = address_space_translate(as, addr, &addr1, &l, true);
2769 if (!(memory_region_is_ram(mr) ||
2770 memory_region_is_romd(mr))) {
2771 l = memory_access_size(mr, l, addr1);
2772 } else {
2773 /* ROM/RAM case */
2774 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2775 switch (type) {
2776 case WRITE_DATA:
2777 memcpy(ptr, buf, l);
2778 invalidate_and_set_dirty(mr, addr1, l);
2779 break;
2780 case FLUSH_CACHE:
2781 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2782 break;
2785 len -= l;
2786 buf += l;
2787 addr += l;
2789 rcu_read_unlock();
2792 /* used for ROM loading : can write in RAM and ROM */
2793 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2794 const uint8_t *buf, int len)
2796 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2799 void cpu_flush_icache_range(hwaddr start, int len)
2802 * This function should do the same thing as an icache flush that was
2803 * triggered from within the guest. For TCG we are always cache coherent,
2804 * so there is no need to flush anything. For KVM / Xen we need to flush
2805 * the host's instruction cache at least.
2807 if (tcg_enabled()) {
2808 return;
2811 cpu_physical_memory_write_rom_internal(&address_space_memory,
2812 start, NULL, len, FLUSH_CACHE);
2815 typedef struct {
2816 MemoryRegion *mr;
2817 void *buffer;
2818 hwaddr addr;
2819 hwaddr len;
2820 bool in_use;
2821 } BounceBuffer;
2823 static BounceBuffer bounce;
2825 typedef struct MapClient {
2826 QEMUBH *bh;
2827 QLIST_ENTRY(MapClient) link;
2828 } MapClient;
2830 QemuMutex map_client_list_lock;
2831 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2832 = QLIST_HEAD_INITIALIZER(map_client_list);
2834 static void cpu_unregister_map_client_do(MapClient *client)
2836 QLIST_REMOVE(client, link);
2837 g_free(client);
2840 static void cpu_notify_map_clients_locked(void)
2842 MapClient *client;
2844 while (!QLIST_EMPTY(&map_client_list)) {
2845 client = QLIST_FIRST(&map_client_list);
2846 qemu_bh_schedule(client->bh);
2847 cpu_unregister_map_client_do(client);
2851 void cpu_register_map_client(QEMUBH *bh)
2853 MapClient *client = g_malloc(sizeof(*client));
2855 qemu_mutex_lock(&map_client_list_lock);
2856 client->bh = bh;
2857 QLIST_INSERT_HEAD(&map_client_list, client, link);
2858 if (!atomic_read(&bounce.in_use)) {
2859 cpu_notify_map_clients_locked();
2861 qemu_mutex_unlock(&map_client_list_lock);
2864 void cpu_exec_init_all(void)
2866 qemu_mutex_init(&ram_list.mutex);
2867 io_mem_init();
2868 memory_map_init();
2869 qemu_mutex_init(&map_client_list_lock);
2872 void cpu_unregister_map_client(QEMUBH *bh)
2874 MapClient *client;
2876 qemu_mutex_lock(&map_client_list_lock);
2877 QLIST_FOREACH(client, &map_client_list, link) {
2878 if (client->bh == bh) {
2879 cpu_unregister_map_client_do(client);
2880 break;
2883 qemu_mutex_unlock(&map_client_list_lock);
2886 static void cpu_notify_map_clients(void)
2888 qemu_mutex_lock(&map_client_list_lock);
2889 cpu_notify_map_clients_locked();
2890 qemu_mutex_unlock(&map_client_list_lock);
2893 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2895 MemoryRegion *mr;
2896 hwaddr l, xlat;
2898 rcu_read_lock();
2899 while (len > 0) {
2900 l = len;
2901 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2902 if (!memory_access_is_direct(mr, is_write)) {
2903 l = memory_access_size(mr, l, addr);
2904 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2905 return false;
2909 len -= l;
2910 addr += l;
2912 rcu_read_unlock();
2913 return true;
2916 /* Map a physical memory region into a host virtual address.
2917 * May map a subset of the requested range, given by and returned in *plen.
2918 * May return NULL if resources needed to perform the mapping are exhausted.
2919 * Use only for reads OR writes - not for read-modify-write operations.
2920 * Use cpu_register_map_client() to know when retrying the map operation is
2921 * likely to succeed.
2923 void *address_space_map(AddressSpace *as,
2924 hwaddr addr,
2925 hwaddr *plen,
2926 bool is_write)
2928 hwaddr len = *plen;
2929 hwaddr done = 0;
2930 hwaddr l, xlat, base;
2931 MemoryRegion *mr, *this_mr;
2932 void *ptr;
2934 if (len == 0) {
2935 return NULL;
2938 l = len;
2939 rcu_read_lock();
2940 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2942 if (!memory_access_is_direct(mr, is_write)) {
2943 if (atomic_xchg(&bounce.in_use, true)) {
2944 rcu_read_unlock();
2945 return NULL;
2947 /* Avoid unbounded allocations */
2948 l = MIN(l, TARGET_PAGE_SIZE);
2949 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2950 bounce.addr = addr;
2951 bounce.len = l;
2953 memory_region_ref(mr);
2954 bounce.mr = mr;
2955 if (!is_write) {
2956 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2957 bounce.buffer, l);
2960 rcu_read_unlock();
2961 *plen = l;
2962 return bounce.buffer;
2965 base = xlat;
2967 for (;;) {
2968 len -= l;
2969 addr += l;
2970 done += l;
2971 if (len == 0) {
2972 break;
2975 l = len;
2976 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2977 if (this_mr != mr || xlat != base + done) {
2978 break;
2982 memory_region_ref(mr);
2983 *plen = done;
2984 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2985 rcu_read_unlock();
2987 return ptr;
2990 /* Unmaps a memory region previously mapped by address_space_map().
2991 * Will also mark the memory as dirty if is_write == 1. access_len gives
2992 * the amount of memory that was actually read or written by the caller.
2994 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2995 int is_write, hwaddr access_len)
2997 if (buffer != bounce.buffer) {
2998 MemoryRegion *mr;
2999 ram_addr_t addr1;
3001 mr = memory_region_from_host(buffer, &addr1);
3002 assert(mr != NULL);
3003 if (is_write) {
3004 invalidate_and_set_dirty(mr, addr1, access_len);
3006 if (xen_enabled()) {
3007 xen_invalidate_map_cache_entry(buffer);
3009 memory_region_unref(mr);
3010 return;
3012 if (is_write) {
3013 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3014 bounce.buffer, access_len);
3016 qemu_vfree(bounce.buffer);
3017 bounce.buffer = NULL;
3018 memory_region_unref(bounce.mr);
3019 atomic_mb_set(&bounce.in_use, false);
3020 cpu_notify_map_clients();
3023 void *cpu_physical_memory_map(hwaddr addr,
3024 hwaddr *plen,
3025 int is_write)
3027 return address_space_map(&address_space_memory, addr, plen, is_write);
3030 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3031 int is_write, hwaddr access_len)
3033 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3036 /* warning: addr must be aligned */
3037 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3038 MemTxAttrs attrs,
3039 MemTxResult *result,
3040 enum device_endian endian)
3042 uint8_t *ptr;
3043 uint64_t val;
3044 MemoryRegion *mr;
3045 hwaddr l = 4;
3046 hwaddr addr1;
3047 MemTxResult r;
3048 bool release_lock = false;
3050 rcu_read_lock();
3051 mr = address_space_translate(as, addr, &addr1, &l, false);
3052 if (l < 4 || !memory_access_is_direct(mr, false)) {
3053 release_lock |= prepare_mmio_access(mr);
3055 /* I/O case */
3056 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3057 #if defined(TARGET_WORDS_BIGENDIAN)
3058 if (endian == DEVICE_LITTLE_ENDIAN) {
3059 val = bswap32(val);
3061 #else
3062 if (endian == DEVICE_BIG_ENDIAN) {
3063 val = bswap32(val);
3065 #endif
3066 } else {
3067 /* RAM case */
3068 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3069 switch (endian) {
3070 case DEVICE_LITTLE_ENDIAN:
3071 val = ldl_le_p(ptr);
3072 break;
3073 case DEVICE_BIG_ENDIAN:
3074 val = ldl_be_p(ptr);
3075 break;
3076 default:
3077 val = ldl_p(ptr);
3078 break;
3080 r = MEMTX_OK;
3082 if (result) {
3083 *result = r;
3085 if (release_lock) {
3086 qemu_mutex_unlock_iothread();
3088 rcu_read_unlock();
3089 return val;
3092 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3093 MemTxAttrs attrs, MemTxResult *result)
3095 return address_space_ldl_internal(as, addr, attrs, result,
3096 DEVICE_NATIVE_ENDIAN);
3099 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3100 MemTxAttrs attrs, MemTxResult *result)
3102 return address_space_ldl_internal(as, addr, attrs, result,
3103 DEVICE_LITTLE_ENDIAN);
3106 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3107 MemTxAttrs attrs, MemTxResult *result)
3109 return address_space_ldl_internal(as, addr, attrs, result,
3110 DEVICE_BIG_ENDIAN);
3113 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3115 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3118 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3120 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3123 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3125 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3128 /* warning: addr must be aligned */
3129 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3130 MemTxAttrs attrs,
3131 MemTxResult *result,
3132 enum device_endian endian)
3134 uint8_t *ptr;
3135 uint64_t val;
3136 MemoryRegion *mr;
3137 hwaddr l = 8;
3138 hwaddr addr1;
3139 MemTxResult r;
3140 bool release_lock = false;
3142 rcu_read_lock();
3143 mr = address_space_translate(as, addr, &addr1, &l,
3144 false);
3145 if (l < 8 || !memory_access_is_direct(mr, false)) {
3146 release_lock |= prepare_mmio_access(mr);
3148 /* I/O case */
3149 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3150 #if defined(TARGET_WORDS_BIGENDIAN)
3151 if (endian == DEVICE_LITTLE_ENDIAN) {
3152 val = bswap64(val);
3154 #else
3155 if (endian == DEVICE_BIG_ENDIAN) {
3156 val = bswap64(val);
3158 #endif
3159 } else {
3160 /* RAM case */
3161 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3162 switch (endian) {
3163 case DEVICE_LITTLE_ENDIAN:
3164 val = ldq_le_p(ptr);
3165 break;
3166 case DEVICE_BIG_ENDIAN:
3167 val = ldq_be_p(ptr);
3168 break;
3169 default:
3170 val = ldq_p(ptr);
3171 break;
3173 r = MEMTX_OK;
3175 if (result) {
3176 *result = r;
3178 if (release_lock) {
3179 qemu_mutex_unlock_iothread();
3181 rcu_read_unlock();
3182 return val;
3185 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3186 MemTxAttrs attrs, MemTxResult *result)
3188 return address_space_ldq_internal(as, addr, attrs, result,
3189 DEVICE_NATIVE_ENDIAN);
3192 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3193 MemTxAttrs attrs, MemTxResult *result)
3195 return address_space_ldq_internal(as, addr, attrs, result,
3196 DEVICE_LITTLE_ENDIAN);
3199 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3200 MemTxAttrs attrs, MemTxResult *result)
3202 return address_space_ldq_internal(as, addr, attrs, result,
3203 DEVICE_BIG_ENDIAN);
3206 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3208 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3211 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3213 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3216 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3218 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3221 /* XXX: optimize */
3222 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3223 MemTxAttrs attrs, MemTxResult *result)
3225 uint8_t val;
3226 MemTxResult r;
3228 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3229 if (result) {
3230 *result = r;
3232 return val;
3235 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3237 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3240 /* warning: addr must be aligned */
3241 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3242 hwaddr addr,
3243 MemTxAttrs attrs,
3244 MemTxResult *result,
3245 enum device_endian endian)
3247 uint8_t *ptr;
3248 uint64_t val;
3249 MemoryRegion *mr;
3250 hwaddr l = 2;
3251 hwaddr addr1;
3252 MemTxResult r;
3253 bool release_lock = false;
3255 rcu_read_lock();
3256 mr = address_space_translate(as, addr, &addr1, &l,
3257 false);
3258 if (l < 2 || !memory_access_is_direct(mr, false)) {
3259 release_lock |= prepare_mmio_access(mr);
3261 /* I/O case */
3262 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3263 #if defined(TARGET_WORDS_BIGENDIAN)
3264 if (endian == DEVICE_LITTLE_ENDIAN) {
3265 val = bswap16(val);
3267 #else
3268 if (endian == DEVICE_BIG_ENDIAN) {
3269 val = bswap16(val);
3271 #endif
3272 } else {
3273 /* RAM case */
3274 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3275 switch (endian) {
3276 case DEVICE_LITTLE_ENDIAN:
3277 val = lduw_le_p(ptr);
3278 break;
3279 case DEVICE_BIG_ENDIAN:
3280 val = lduw_be_p(ptr);
3281 break;
3282 default:
3283 val = lduw_p(ptr);
3284 break;
3286 r = MEMTX_OK;
3288 if (result) {
3289 *result = r;
3291 if (release_lock) {
3292 qemu_mutex_unlock_iothread();
3294 rcu_read_unlock();
3295 return val;
3298 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3299 MemTxAttrs attrs, MemTxResult *result)
3301 return address_space_lduw_internal(as, addr, attrs, result,
3302 DEVICE_NATIVE_ENDIAN);
3305 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3306 MemTxAttrs attrs, MemTxResult *result)
3308 return address_space_lduw_internal(as, addr, attrs, result,
3309 DEVICE_LITTLE_ENDIAN);
3312 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3313 MemTxAttrs attrs, MemTxResult *result)
3315 return address_space_lduw_internal(as, addr, attrs, result,
3316 DEVICE_BIG_ENDIAN);
3319 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3321 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3324 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3326 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3329 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3331 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3334 /* warning: addr must be aligned. The ram page is not masked as dirty
3335 and the code inside is not invalidated. It is useful if the dirty
3336 bits are used to track modified PTEs */
3337 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3338 MemTxAttrs attrs, MemTxResult *result)
3340 uint8_t *ptr;
3341 MemoryRegion *mr;
3342 hwaddr l = 4;
3343 hwaddr addr1;
3344 MemTxResult r;
3345 uint8_t dirty_log_mask;
3346 bool release_lock = false;
3348 rcu_read_lock();
3349 mr = address_space_translate(as, addr, &addr1, &l,
3350 true);
3351 if (l < 4 || !memory_access_is_direct(mr, true)) {
3352 release_lock |= prepare_mmio_access(mr);
3354 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3355 } else {
3356 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3357 stl_p(ptr, val);
3359 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3360 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3361 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3362 4, dirty_log_mask);
3363 r = MEMTX_OK;
3365 if (result) {
3366 *result = r;
3368 if (release_lock) {
3369 qemu_mutex_unlock_iothread();
3371 rcu_read_unlock();
3374 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3376 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3379 /* warning: addr must be aligned */
3380 static inline void address_space_stl_internal(AddressSpace *as,
3381 hwaddr addr, uint32_t val,
3382 MemTxAttrs attrs,
3383 MemTxResult *result,
3384 enum device_endian endian)
3386 uint8_t *ptr;
3387 MemoryRegion *mr;
3388 hwaddr l = 4;
3389 hwaddr addr1;
3390 MemTxResult r;
3391 bool release_lock = false;
3393 rcu_read_lock();
3394 mr = address_space_translate(as, addr, &addr1, &l,
3395 true);
3396 if (l < 4 || !memory_access_is_direct(mr, true)) {
3397 release_lock |= prepare_mmio_access(mr);
3399 #if defined(TARGET_WORDS_BIGENDIAN)
3400 if (endian == DEVICE_LITTLE_ENDIAN) {
3401 val = bswap32(val);
3403 #else
3404 if (endian == DEVICE_BIG_ENDIAN) {
3405 val = bswap32(val);
3407 #endif
3408 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3409 } else {
3410 /* RAM case */
3411 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3412 switch (endian) {
3413 case DEVICE_LITTLE_ENDIAN:
3414 stl_le_p(ptr, val);
3415 break;
3416 case DEVICE_BIG_ENDIAN:
3417 stl_be_p(ptr, val);
3418 break;
3419 default:
3420 stl_p(ptr, val);
3421 break;
3423 invalidate_and_set_dirty(mr, addr1, 4);
3424 r = MEMTX_OK;
3426 if (result) {
3427 *result = r;
3429 if (release_lock) {
3430 qemu_mutex_unlock_iothread();
3432 rcu_read_unlock();
3435 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3436 MemTxAttrs attrs, MemTxResult *result)
3438 address_space_stl_internal(as, addr, val, attrs, result,
3439 DEVICE_NATIVE_ENDIAN);
3442 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3443 MemTxAttrs attrs, MemTxResult *result)
3445 address_space_stl_internal(as, addr, val, attrs, result,
3446 DEVICE_LITTLE_ENDIAN);
3449 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3450 MemTxAttrs attrs, MemTxResult *result)
3452 address_space_stl_internal(as, addr, val, attrs, result,
3453 DEVICE_BIG_ENDIAN);
3456 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3458 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3461 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3463 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3466 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3468 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3471 /* XXX: optimize */
3472 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3473 MemTxAttrs attrs, MemTxResult *result)
3475 uint8_t v = val;
3476 MemTxResult r;
3478 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3479 if (result) {
3480 *result = r;
3484 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3486 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3489 /* warning: addr must be aligned */
3490 static inline void address_space_stw_internal(AddressSpace *as,
3491 hwaddr addr, uint32_t val,
3492 MemTxAttrs attrs,
3493 MemTxResult *result,
3494 enum device_endian endian)
3496 uint8_t *ptr;
3497 MemoryRegion *mr;
3498 hwaddr l = 2;
3499 hwaddr addr1;
3500 MemTxResult r;
3501 bool release_lock = false;
3503 rcu_read_lock();
3504 mr = address_space_translate(as, addr, &addr1, &l, true);
3505 if (l < 2 || !memory_access_is_direct(mr, true)) {
3506 release_lock |= prepare_mmio_access(mr);
3508 #if defined(TARGET_WORDS_BIGENDIAN)
3509 if (endian == DEVICE_LITTLE_ENDIAN) {
3510 val = bswap16(val);
3512 #else
3513 if (endian == DEVICE_BIG_ENDIAN) {
3514 val = bswap16(val);
3516 #endif
3517 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3518 } else {
3519 /* RAM case */
3520 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3521 switch (endian) {
3522 case DEVICE_LITTLE_ENDIAN:
3523 stw_le_p(ptr, val);
3524 break;
3525 case DEVICE_BIG_ENDIAN:
3526 stw_be_p(ptr, val);
3527 break;
3528 default:
3529 stw_p(ptr, val);
3530 break;
3532 invalidate_and_set_dirty(mr, addr1, 2);
3533 r = MEMTX_OK;
3535 if (result) {
3536 *result = r;
3538 if (release_lock) {
3539 qemu_mutex_unlock_iothread();
3541 rcu_read_unlock();
3544 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3545 MemTxAttrs attrs, MemTxResult *result)
3547 address_space_stw_internal(as, addr, val, attrs, result,
3548 DEVICE_NATIVE_ENDIAN);
3551 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3552 MemTxAttrs attrs, MemTxResult *result)
3554 address_space_stw_internal(as, addr, val, attrs, result,
3555 DEVICE_LITTLE_ENDIAN);
3558 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3559 MemTxAttrs attrs, MemTxResult *result)
3561 address_space_stw_internal(as, addr, val, attrs, result,
3562 DEVICE_BIG_ENDIAN);
3565 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3567 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3570 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3572 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3575 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3577 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3580 /* XXX: optimize */
3581 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3582 MemTxAttrs attrs, MemTxResult *result)
3584 MemTxResult r;
3585 val = tswap64(val);
3586 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3587 if (result) {
3588 *result = r;
3592 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3593 MemTxAttrs attrs, MemTxResult *result)
3595 MemTxResult r;
3596 val = cpu_to_le64(val);
3597 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3598 if (result) {
3599 *result = r;
3602 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3603 MemTxAttrs attrs, MemTxResult *result)
3605 MemTxResult r;
3606 val = cpu_to_be64(val);
3607 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3608 if (result) {
3609 *result = r;
3613 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3615 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3618 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3620 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3623 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3625 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3628 /* virtual memory access for debug (includes writing to ROM) */
3629 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3630 uint8_t *buf, int len, int is_write)
3632 int l;
3633 hwaddr phys_addr;
3634 target_ulong page;
3636 while (len > 0) {
3637 int asidx;
3638 MemTxAttrs attrs;
3640 page = addr & TARGET_PAGE_MASK;
3641 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3642 asidx = cpu_asidx_from_attrs(cpu, attrs);
3643 /* if no physical page mapped, return an error */
3644 if (phys_addr == -1)
3645 return -1;
3646 l = (page + TARGET_PAGE_SIZE) - addr;
3647 if (l > len)
3648 l = len;
3649 phys_addr += (addr & ~TARGET_PAGE_MASK);
3650 if (is_write) {
3651 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3652 phys_addr, buf, l);
3653 } else {
3654 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3655 MEMTXATTRS_UNSPECIFIED,
3656 buf, l, 0);
3658 len -= l;
3659 buf += l;
3660 addr += l;
3662 return 0;
3666 * Allows code that needs to deal with migration bitmaps etc to still be built
3667 * target independent.
3669 size_t qemu_target_page_bits(void)
3671 return TARGET_PAGE_BITS;
3674 #endif
3677 * A helper function for the _utterly broken_ virtio device model to find out if
3678 * it's running on a big endian machine. Don't do this at home kids!
3680 bool target_words_bigendian(void);
3681 bool target_words_bigendian(void)
3683 #if defined(TARGET_WORDS_BIGENDIAN)
3684 return true;
3685 #else
3686 return false;
3687 #endif
3690 #ifndef CONFIG_USER_ONLY
3691 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3693 MemoryRegion*mr;
3694 hwaddr l = 1;
3695 bool res;
3697 rcu_read_lock();
3698 mr = address_space_translate(&address_space_memory,
3699 phys_addr, &phys_addr, &l, false);
3701 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3702 rcu_read_unlock();
3703 return res;
3706 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3708 RAMBlock *block;
3709 int ret = 0;
3711 rcu_read_lock();
3712 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3713 ret = func(block->idstr, block->host, block->offset,
3714 block->used_length, opaque);
3715 if (ret) {
3716 break;
3719 rcu_read_unlock();
3720 return ret;
3722 #endif