vl: Simplify global property registration
[qemu/kevin.git] / exec.c
blob2e363f06a6bf041b94bee0a392a3f85f39a78d23
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #include <sys/mman.h>
23 #endif
25 #include "qemu/cutils.h"
26 #include "cpu.h"
27 #include "exec/exec-all.h"
28 #include "tcg.h"
29 #include "hw/qdev-core.h"
30 #if !defined(CONFIG_USER_ONLY)
31 #include "hw/boards.h"
32 #include "hw/xen/xen.h"
33 #endif
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include <qemu.h>
41 #else /* !CONFIG_USER_ONLY */
42 #include "hw/hw.h"
43 #include "exec/memory.h"
44 #include "exec/ioport.h"
45 #include "sysemu/dma.h"
46 #include "exec/address-spaces.h"
47 #include "sysemu/xen-mapcache.h"
48 #include "trace.h"
49 #endif
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
58 #include "exec/log.h"
60 #include "qemu/range.h"
61 #ifndef _WIN32
62 #include "qemu/mmap-alloc.h"
63 #endif
65 //#define DEBUG_SUBPAGE
67 #if !defined(CONFIG_USER_ONLY)
68 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
69 * are protected by the ramlist lock.
71 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
73 static MemoryRegion *system_memory;
74 static MemoryRegion *system_io;
76 AddressSpace address_space_io;
77 AddressSpace address_space_memory;
79 MemoryRegion io_mem_rom, io_mem_notdirty;
80 static MemoryRegion io_mem_unassigned;
82 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
83 #define RAM_PREALLOC (1 << 0)
85 /* RAM is mmap-ed with MAP_SHARED */
86 #define RAM_SHARED (1 << 1)
88 /* Only a portion of RAM (used_length) is actually used, and migrated.
89 * This used_length size can change across reboots.
91 #define RAM_RESIZEABLE (1 << 2)
93 #endif
95 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
96 /* current CPU in the current thread. It is only valid inside
97 cpu_exec() */
98 __thread CPUState *current_cpu;
99 /* 0 = Do not count executed instructions.
100 1 = Precise instruction counting.
101 2 = Adaptive rate instruction counting. */
102 int use_icount;
104 #if !defined(CONFIG_USER_ONLY)
106 typedef struct PhysPageEntry PhysPageEntry;
108 struct PhysPageEntry {
109 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
110 uint32_t skip : 6;
111 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
112 uint32_t ptr : 26;
115 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
117 /* Size of the L2 (and L3, etc) page tables. */
118 #define ADDR_SPACE_BITS 64
120 #define P_L2_BITS 9
121 #define P_L2_SIZE (1 << P_L2_BITS)
123 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
125 typedef PhysPageEntry Node[P_L2_SIZE];
127 typedef struct PhysPageMap {
128 struct rcu_head rcu;
130 unsigned sections_nb;
131 unsigned sections_nb_alloc;
132 unsigned nodes_nb;
133 unsigned nodes_nb_alloc;
134 Node *nodes;
135 MemoryRegionSection *sections;
136 } PhysPageMap;
138 struct AddressSpaceDispatch {
139 struct rcu_head rcu;
141 MemoryRegionSection *mru_section;
142 /* This is a multi-level map on the physical address space.
143 * The bottom level has pointers to MemoryRegionSections.
145 PhysPageEntry phys_map;
146 PhysPageMap map;
147 AddressSpace *as;
150 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
151 typedef struct subpage_t {
152 MemoryRegion iomem;
153 AddressSpace *as;
154 hwaddr base;
155 uint16_t sub_section[TARGET_PAGE_SIZE];
156 } subpage_t;
158 #define PHYS_SECTION_UNASSIGNED 0
159 #define PHYS_SECTION_NOTDIRTY 1
160 #define PHYS_SECTION_ROM 2
161 #define PHYS_SECTION_WATCH 3
163 static void io_mem_init(void);
164 static void memory_map_init(void);
165 static void tcg_commit(MemoryListener *listener);
167 static MemoryRegion io_mem_watch;
170 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
171 * @cpu: the CPU whose AddressSpace this is
172 * @as: the AddressSpace itself
173 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
174 * @tcg_as_listener: listener for tracking changes to the AddressSpace
176 struct CPUAddressSpace {
177 CPUState *cpu;
178 AddressSpace *as;
179 struct AddressSpaceDispatch *memory_dispatch;
180 MemoryListener tcg_as_listener;
183 #endif
185 #if !defined(CONFIG_USER_ONLY)
187 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
189 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
190 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
191 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
192 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
196 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
198 unsigned i;
199 uint32_t ret;
200 PhysPageEntry e;
201 PhysPageEntry *p;
203 ret = map->nodes_nb++;
204 p = map->nodes[ret];
205 assert(ret != PHYS_MAP_NODE_NIL);
206 assert(ret != map->nodes_nb_alloc);
208 e.skip = leaf ? 0 : 1;
209 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
210 for (i = 0; i < P_L2_SIZE; ++i) {
211 memcpy(&p[i], &e, sizeof(e));
213 return ret;
216 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
217 hwaddr *index, hwaddr *nb, uint16_t leaf,
218 int level)
220 PhysPageEntry *p;
221 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
223 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
224 lp->ptr = phys_map_node_alloc(map, level == 0);
226 p = map->nodes[lp->ptr];
227 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
229 while (*nb && lp < &p[P_L2_SIZE]) {
230 if ((*index & (step - 1)) == 0 && *nb >= step) {
231 lp->skip = 0;
232 lp->ptr = leaf;
233 *index += step;
234 *nb -= step;
235 } else {
236 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
238 ++lp;
242 static void phys_page_set(AddressSpaceDispatch *d,
243 hwaddr index, hwaddr nb,
244 uint16_t leaf)
246 /* Wildly overreserve - it doesn't matter much. */
247 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
249 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
252 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
253 * and update our entry so we can skip it and go directly to the destination.
255 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
257 unsigned valid_ptr = P_L2_SIZE;
258 int valid = 0;
259 PhysPageEntry *p;
260 int i;
262 if (lp->ptr == PHYS_MAP_NODE_NIL) {
263 return;
266 p = nodes[lp->ptr];
267 for (i = 0; i < P_L2_SIZE; i++) {
268 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
269 continue;
272 valid_ptr = i;
273 valid++;
274 if (p[i].skip) {
275 phys_page_compact(&p[i], nodes, compacted);
279 /* We can only compress if there's only one child. */
280 if (valid != 1) {
281 return;
284 assert(valid_ptr < P_L2_SIZE);
286 /* Don't compress if it won't fit in the # of bits we have. */
287 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
288 return;
291 lp->ptr = p[valid_ptr].ptr;
292 if (!p[valid_ptr].skip) {
293 /* If our only child is a leaf, make this a leaf. */
294 /* By design, we should have made this node a leaf to begin with so we
295 * should never reach here.
296 * But since it's so simple to handle this, let's do it just in case we
297 * change this rule.
299 lp->skip = 0;
300 } else {
301 lp->skip += p[valid_ptr].skip;
305 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
307 DECLARE_BITMAP(compacted, nodes_nb);
309 if (d->phys_map.skip) {
310 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
314 static inline bool section_covers_addr(const MemoryRegionSection *section,
315 hwaddr addr)
317 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
318 * the section must cover the entire address space.
320 return section->size.hi ||
321 range_covers_byte(section->offset_within_address_space,
322 section->size.lo, addr);
325 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
326 Node *nodes, MemoryRegionSection *sections)
328 PhysPageEntry *p;
329 hwaddr index = addr >> TARGET_PAGE_BITS;
330 int i;
332 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
333 if (lp.ptr == PHYS_MAP_NODE_NIL) {
334 return &sections[PHYS_SECTION_UNASSIGNED];
336 p = nodes[lp.ptr];
337 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
340 if (section_covers_addr(&sections[lp.ptr], addr)) {
341 return &sections[lp.ptr];
342 } else {
343 return &sections[PHYS_SECTION_UNASSIGNED];
347 bool memory_region_is_unassigned(MemoryRegion *mr)
349 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
350 && mr != &io_mem_watch;
353 /* Called from RCU critical section */
354 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
355 hwaddr addr,
356 bool resolve_subpage)
358 MemoryRegionSection *section = atomic_read(&d->mru_section);
359 subpage_t *subpage;
360 bool update;
362 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
363 section_covers_addr(section, addr)) {
364 update = false;
365 } else {
366 section = phys_page_find(d->phys_map, addr, d->map.nodes,
367 d->map.sections);
368 update = true;
370 if (resolve_subpage && section->mr->subpage) {
371 subpage = container_of(section->mr, subpage_t, iomem);
372 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
374 if (update) {
375 atomic_set(&d->mru_section, section);
377 return section;
380 /* Called from RCU critical section */
381 static MemoryRegionSection *
382 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
383 hwaddr *plen, bool resolve_subpage)
385 MemoryRegionSection *section;
386 MemoryRegion *mr;
387 Int128 diff;
389 section = address_space_lookup_region(d, addr, resolve_subpage);
390 /* Compute offset within MemoryRegionSection */
391 addr -= section->offset_within_address_space;
393 /* Compute offset within MemoryRegion */
394 *xlat = addr + section->offset_within_region;
396 mr = section->mr;
398 /* MMIO registers can be expected to perform full-width accesses based only
399 * on their address, without considering adjacent registers that could
400 * decode to completely different MemoryRegions. When such registers
401 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
402 * regions overlap wildly. For this reason we cannot clamp the accesses
403 * here.
405 * If the length is small (as is the case for address_space_ldl/stl),
406 * everything works fine. If the incoming length is large, however,
407 * the caller really has to do the clamping through memory_access_size.
409 if (memory_region_is_ram(mr)) {
410 diff = int128_sub(section->size, int128_make64(addr));
411 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
413 return section;
416 /* Called from RCU critical section */
417 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
418 hwaddr *xlat, hwaddr *plen,
419 bool is_write)
421 IOMMUTLBEntry iotlb;
422 MemoryRegionSection *section;
423 MemoryRegion *mr;
425 for (;;) {
426 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
427 section = address_space_translate_internal(d, addr, &addr, plen, true);
428 mr = section->mr;
430 if (!mr->iommu_ops) {
431 break;
434 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
435 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
436 | (addr & iotlb.addr_mask));
437 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
438 if (!(iotlb.perm & (1 << is_write))) {
439 mr = &io_mem_unassigned;
440 break;
443 as = iotlb.target_as;
446 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
447 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
448 *plen = MIN(page, *plen);
451 *xlat = addr;
452 return mr;
455 /* Called from RCU critical section */
456 MemoryRegionSection *
457 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
458 hwaddr *xlat, hwaddr *plen)
460 MemoryRegionSection *section;
461 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
463 section = address_space_translate_internal(d, addr, xlat, plen, false);
465 assert(!section->mr->iommu_ops);
466 return section;
468 #endif
470 #if !defined(CONFIG_USER_ONLY)
472 static int cpu_common_post_load(void *opaque, int version_id)
474 CPUState *cpu = opaque;
476 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
477 version_id is increased. */
478 cpu->interrupt_request &= ~0x01;
479 tlb_flush(cpu, 1);
481 return 0;
484 static int cpu_common_pre_load(void *opaque)
486 CPUState *cpu = opaque;
488 cpu->exception_index = -1;
490 return 0;
493 static bool cpu_common_exception_index_needed(void *opaque)
495 CPUState *cpu = opaque;
497 return tcg_enabled() && cpu->exception_index != -1;
500 static const VMStateDescription vmstate_cpu_common_exception_index = {
501 .name = "cpu_common/exception_index",
502 .version_id = 1,
503 .minimum_version_id = 1,
504 .needed = cpu_common_exception_index_needed,
505 .fields = (VMStateField[]) {
506 VMSTATE_INT32(exception_index, CPUState),
507 VMSTATE_END_OF_LIST()
511 static bool cpu_common_crash_occurred_needed(void *opaque)
513 CPUState *cpu = opaque;
515 return cpu->crash_occurred;
518 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
519 .name = "cpu_common/crash_occurred",
520 .version_id = 1,
521 .minimum_version_id = 1,
522 .needed = cpu_common_crash_occurred_needed,
523 .fields = (VMStateField[]) {
524 VMSTATE_BOOL(crash_occurred, CPUState),
525 VMSTATE_END_OF_LIST()
529 const VMStateDescription vmstate_cpu_common = {
530 .name = "cpu_common",
531 .version_id = 1,
532 .minimum_version_id = 1,
533 .pre_load = cpu_common_pre_load,
534 .post_load = cpu_common_post_load,
535 .fields = (VMStateField[]) {
536 VMSTATE_UINT32(halted, CPUState),
537 VMSTATE_UINT32(interrupt_request, CPUState),
538 VMSTATE_END_OF_LIST()
540 .subsections = (const VMStateDescription*[]) {
541 &vmstate_cpu_common_exception_index,
542 &vmstate_cpu_common_crash_occurred,
543 NULL
547 #endif
549 CPUState *qemu_get_cpu(int index)
551 CPUState *cpu;
553 CPU_FOREACH(cpu) {
554 if (cpu->cpu_index == index) {
555 return cpu;
559 return NULL;
562 #if !defined(CONFIG_USER_ONLY)
563 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
565 CPUAddressSpace *newas;
567 /* Target code should have set num_ases before calling us */
568 assert(asidx < cpu->num_ases);
570 if (asidx == 0) {
571 /* address space 0 gets the convenience alias */
572 cpu->as = as;
575 /* KVM cannot currently support multiple address spaces. */
576 assert(asidx == 0 || !kvm_enabled());
578 if (!cpu->cpu_ases) {
579 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
582 newas = &cpu->cpu_ases[asidx];
583 newas->cpu = cpu;
584 newas->as = as;
585 if (tcg_enabled()) {
586 newas->tcg_as_listener.commit = tcg_commit;
587 memory_listener_register(&newas->tcg_as_listener, as);
591 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
593 /* Return the AddressSpace corresponding to the specified index */
594 return cpu->cpu_ases[asidx].as;
596 #endif
598 #ifndef CONFIG_USER_ONLY
599 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
601 static int cpu_get_free_index(Error **errp)
603 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
605 if (cpu >= MAX_CPUMASK_BITS) {
606 error_setg(errp, "Trying to use more CPUs than max of %d",
607 MAX_CPUMASK_BITS);
608 return -1;
611 bitmap_set(cpu_index_map, cpu, 1);
612 return cpu;
615 void cpu_exec_exit(CPUState *cpu)
617 if (cpu->cpu_index == -1) {
618 /* cpu_index was never allocated by this @cpu or was already freed. */
619 return;
622 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
623 cpu->cpu_index = -1;
625 #else
627 static int cpu_get_free_index(Error **errp)
629 CPUState *some_cpu;
630 int cpu_index = 0;
632 CPU_FOREACH(some_cpu) {
633 cpu_index++;
635 return cpu_index;
638 void cpu_exec_exit(CPUState *cpu)
641 #endif
643 void cpu_exec_init(CPUState *cpu, Error **errp)
645 CPUClass *cc = CPU_GET_CLASS(cpu);
646 Error *local_err = NULL;
648 cpu->as = NULL;
649 cpu->num_ases = 0;
651 #ifndef CONFIG_USER_ONLY
652 cpu->thread_id = qemu_get_thread_id();
654 /* This is a softmmu CPU object, so create a property for it
655 * so users can wire up its memory. (This can't go in qom/cpu.c
656 * because that file is compiled only once for both user-mode
657 * and system builds.) The default if no link is set up is to use
658 * the system address space.
660 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
661 (Object **)&cpu->memory,
662 qdev_prop_allow_set_link_before_realize,
663 OBJ_PROP_LINK_UNREF_ON_RELEASE,
664 &error_abort);
665 cpu->memory = system_memory;
666 object_ref(OBJECT(cpu->memory));
667 #endif
669 #if defined(CONFIG_USER_ONLY)
670 cpu_list_lock();
671 #endif
672 cpu->cpu_index = cpu_get_free_index(&local_err);
673 if (local_err) {
674 error_propagate(errp, local_err);
675 #if defined(CONFIG_USER_ONLY)
676 cpu_list_unlock();
677 #endif
678 return;
680 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
681 #if defined(CONFIG_USER_ONLY)
682 (void) cc;
683 cpu_list_unlock();
684 #else
685 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
686 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
688 if (cc->vmsd != NULL) {
689 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
691 #endif
694 #if defined(CONFIG_USER_ONLY)
695 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
697 tb_invalidate_phys_page_range(pc, pc + 1, 0);
699 #else
700 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
702 MemTxAttrs attrs;
703 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
704 int asidx = cpu_asidx_from_attrs(cpu, attrs);
705 if (phys != -1) {
706 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
707 phys | (pc & ~TARGET_PAGE_MASK));
710 #endif
712 #if defined(CONFIG_USER_ONLY)
713 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
718 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
719 int flags)
721 return -ENOSYS;
724 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
728 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
729 int flags, CPUWatchpoint **watchpoint)
731 return -ENOSYS;
733 #else
734 /* Add a watchpoint. */
735 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
736 int flags, CPUWatchpoint **watchpoint)
738 CPUWatchpoint *wp;
740 /* forbid ranges which are empty or run off the end of the address space */
741 if (len == 0 || (addr + len - 1) < addr) {
742 error_report("tried to set invalid watchpoint at %"
743 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
744 return -EINVAL;
746 wp = g_malloc(sizeof(*wp));
748 wp->vaddr = addr;
749 wp->len = len;
750 wp->flags = flags;
752 /* keep all GDB-injected watchpoints in front */
753 if (flags & BP_GDB) {
754 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
755 } else {
756 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
759 tlb_flush_page(cpu, addr);
761 if (watchpoint)
762 *watchpoint = wp;
763 return 0;
766 /* Remove a specific watchpoint. */
767 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
768 int flags)
770 CPUWatchpoint *wp;
772 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
773 if (addr == wp->vaddr && len == wp->len
774 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
775 cpu_watchpoint_remove_by_ref(cpu, wp);
776 return 0;
779 return -ENOENT;
782 /* Remove a specific watchpoint by reference. */
783 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
785 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
787 tlb_flush_page(cpu, watchpoint->vaddr);
789 g_free(watchpoint);
792 /* Remove all matching watchpoints. */
793 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
795 CPUWatchpoint *wp, *next;
797 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
798 if (wp->flags & mask) {
799 cpu_watchpoint_remove_by_ref(cpu, wp);
804 /* Return true if this watchpoint address matches the specified
805 * access (ie the address range covered by the watchpoint overlaps
806 * partially or completely with the address range covered by the
807 * access).
809 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
810 vaddr addr,
811 vaddr len)
813 /* We know the lengths are non-zero, but a little caution is
814 * required to avoid errors in the case where the range ends
815 * exactly at the top of the address space and so addr + len
816 * wraps round to zero.
818 vaddr wpend = wp->vaddr + wp->len - 1;
819 vaddr addrend = addr + len - 1;
821 return !(addr > wpend || wp->vaddr > addrend);
824 #endif
826 /* Add a breakpoint. */
827 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
828 CPUBreakpoint **breakpoint)
830 CPUBreakpoint *bp;
832 bp = g_malloc(sizeof(*bp));
834 bp->pc = pc;
835 bp->flags = flags;
837 /* keep all GDB-injected breakpoints in front */
838 if (flags & BP_GDB) {
839 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
840 } else {
841 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
844 breakpoint_invalidate(cpu, pc);
846 if (breakpoint) {
847 *breakpoint = bp;
849 return 0;
852 /* Remove a specific breakpoint. */
853 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
855 CPUBreakpoint *bp;
857 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
858 if (bp->pc == pc && bp->flags == flags) {
859 cpu_breakpoint_remove_by_ref(cpu, bp);
860 return 0;
863 return -ENOENT;
866 /* Remove a specific breakpoint by reference. */
867 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
869 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
871 breakpoint_invalidate(cpu, breakpoint->pc);
873 g_free(breakpoint);
876 /* Remove all matching breakpoints. */
877 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
879 CPUBreakpoint *bp, *next;
881 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
882 if (bp->flags & mask) {
883 cpu_breakpoint_remove_by_ref(cpu, bp);
888 /* enable or disable single step mode. EXCP_DEBUG is returned by the
889 CPU loop after each instruction */
890 void cpu_single_step(CPUState *cpu, int enabled)
892 if (cpu->singlestep_enabled != enabled) {
893 cpu->singlestep_enabled = enabled;
894 if (kvm_enabled()) {
895 kvm_update_guest_debug(cpu, 0);
896 } else {
897 /* must flush all the translated code to avoid inconsistencies */
898 /* XXX: only flush what is necessary */
899 tb_flush(cpu);
904 void cpu_abort(CPUState *cpu, const char *fmt, ...)
906 va_list ap;
907 va_list ap2;
909 va_start(ap, fmt);
910 va_copy(ap2, ap);
911 fprintf(stderr, "qemu: fatal: ");
912 vfprintf(stderr, fmt, ap);
913 fprintf(stderr, "\n");
914 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
915 if (qemu_log_separate()) {
916 qemu_log("qemu: fatal: ");
917 qemu_log_vprintf(fmt, ap2);
918 qemu_log("\n");
919 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
920 qemu_log_flush();
921 qemu_log_close();
923 va_end(ap2);
924 va_end(ap);
925 replay_finish();
926 #if defined(CONFIG_USER_ONLY)
928 struct sigaction act;
929 sigfillset(&act.sa_mask);
930 act.sa_handler = SIG_DFL;
931 sigaction(SIGABRT, &act, NULL);
933 #endif
934 abort();
937 #if !defined(CONFIG_USER_ONLY)
938 /* Called from RCU critical section */
939 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
941 RAMBlock *block;
943 block = atomic_rcu_read(&ram_list.mru_block);
944 if (block && addr - block->offset < block->max_length) {
945 return block;
947 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
948 if (addr - block->offset < block->max_length) {
949 goto found;
953 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
954 abort();
956 found:
957 /* It is safe to write mru_block outside the iothread lock. This
958 * is what happens:
960 * mru_block = xxx
961 * rcu_read_unlock()
962 * xxx removed from list
963 * rcu_read_lock()
964 * read mru_block
965 * mru_block = NULL;
966 * call_rcu(reclaim_ramblock, xxx);
967 * rcu_read_unlock()
969 * atomic_rcu_set is not needed here. The block was already published
970 * when it was placed into the list. Here we're just making an extra
971 * copy of the pointer.
973 ram_list.mru_block = block;
974 return block;
977 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
979 CPUState *cpu;
980 ram_addr_t start1;
981 RAMBlock *block;
982 ram_addr_t end;
984 end = TARGET_PAGE_ALIGN(start + length);
985 start &= TARGET_PAGE_MASK;
987 rcu_read_lock();
988 block = qemu_get_ram_block(start);
989 assert(block == qemu_get_ram_block(end - 1));
990 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
991 CPU_FOREACH(cpu) {
992 tlb_reset_dirty(cpu, start1, length);
994 rcu_read_unlock();
997 /* Note: start and end must be within the same ram block. */
998 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
999 ram_addr_t length,
1000 unsigned client)
1002 DirtyMemoryBlocks *blocks;
1003 unsigned long end, page;
1004 bool dirty = false;
1006 if (length == 0) {
1007 return false;
1010 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1011 page = start >> TARGET_PAGE_BITS;
1013 rcu_read_lock();
1015 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1017 while (page < end) {
1018 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1019 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1020 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1022 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1023 offset, num);
1024 page += num;
1027 rcu_read_unlock();
1029 if (dirty && tcg_enabled()) {
1030 tlb_reset_dirty_range_all(start, length);
1033 return dirty;
1036 /* Called from RCU critical section */
1037 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1038 MemoryRegionSection *section,
1039 target_ulong vaddr,
1040 hwaddr paddr, hwaddr xlat,
1041 int prot,
1042 target_ulong *address)
1044 hwaddr iotlb;
1045 CPUWatchpoint *wp;
1047 if (memory_region_is_ram(section->mr)) {
1048 /* Normal RAM. */
1049 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1050 + xlat;
1051 if (!section->readonly) {
1052 iotlb |= PHYS_SECTION_NOTDIRTY;
1053 } else {
1054 iotlb |= PHYS_SECTION_ROM;
1056 } else {
1057 AddressSpaceDispatch *d;
1059 d = atomic_rcu_read(&section->address_space->dispatch);
1060 iotlb = section - d->map.sections;
1061 iotlb += xlat;
1064 /* Make accesses to pages with watchpoints go via the
1065 watchpoint trap routines. */
1066 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1067 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1068 /* Avoid trapping reads of pages with a write breakpoint. */
1069 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1070 iotlb = PHYS_SECTION_WATCH + paddr;
1071 *address |= TLB_MMIO;
1072 break;
1077 return iotlb;
1079 #endif /* defined(CONFIG_USER_ONLY) */
1081 #if !defined(CONFIG_USER_ONLY)
1083 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1084 uint16_t section);
1085 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1087 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1088 qemu_anon_ram_alloc;
1091 * Set a custom physical guest memory alloator.
1092 * Accelerators with unusual needs may need this. Hopefully, we can
1093 * get rid of it eventually.
1095 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1097 phys_mem_alloc = alloc;
1100 static uint16_t phys_section_add(PhysPageMap *map,
1101 MemoryRegionSection *section)
1103 /* The physical section number is ORed with a page-aligned
1104 * pointer to produce the iotlb entries. Thus it should
1105 * never overflow into the page-aligned value.
1107 assert(map->sections_nb < TARGET_PAGE_SIZE);
1109 if (map->sections_nb == map->sections_nb_alloc) {
1110 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1111 map->sections = g_renew(MemoryRegionSection, map->sections,
1112 map->sections_nb_alloc);
1114 map->sections[map->sections_nb] = *section;
1115 memory_region_ref(section->mr);
1116 return map->sections_nb++;
1119 static void phys_section_destroy(MemoryRegion *mr)
1121 bool have_sub_page = mr->subpage;
1123 memory_region_unref(mr);
1125 if (have_sub_page) {
1126 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1127 object_unref(OBJECT(&subpage->iomem));
1128 g_free(subpage);
1132 static void phys_sections_free(PhysPageMap *map)
1134 while (map->sections_nb > 0) {
1135 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1136 phys_section_destroy(section->mr);
1138 g_free(map->sections);
1139 g_free(map->nodes);
1142 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1144 subpage_t *subpage;
1145 hwaddr base = section->offset_within_address_space
1146 & TARGET_PAGE_MASK;
1147 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1148 d->map.nodes, d->map.sections);
1149 MemoryRegionSection subsection = {
1150 .offset_within_address_space = base,
1151 .size = int128_make64(TARGET_PAGE_SIZE),
1153 hwaddr start, end;
1155 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1157 if (!(existing->mr->subpage)) {
1158 subpage = subpage_init(d->as, base);
1159 subsection.address_space = d->as;
1160 subsection.mr = &subpage->iomem;
1161 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1162 phys_section_add(&d->map, &subsection));
1163 } else {
1164 subpage = container_of(existing->mr, subpage_t, iomem);
1166 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1167 end = start + int128_get64(section->size) - 1;
1168 subpage_register(subpage, start, end,
1169 phys_section_add(&d->map, section));
1173 static void register_multipage(AddressSpaceDispatch *d,
1174 MemoryRegionSection *section)
1176 hwaddr start_addr = section->offset_within_address_space;
1177 uint16_t section_index = phys_section_add(&d->map, section);
1178 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1179 TARGET_PAGE_BITS));
1181 assert(num_pages);
1182 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1185 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1187 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1188 AddressSpaceDispatch *d = as->next_dispatch;
1189 MemoryRegionSection now = *section, remain = *section;
1190 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1192 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1193 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1194 - now.offset_within_address_space;
1196 now.size = int128_min(int128_make64(left), now.size);
1197 register_subpage(d, &now);
1198 } else {
1199 now.size = int128_zero();
1201 while (int128_ne(remain.size, now.size)) {
1202 remain.size = int128_sub(remain.size, now.size);
1203 remain.offset_within_address_space += int128_get64(now.size);
1204 remain.offset_within_region += int128_get64(now.size);
1205 now = remain;
1206 if (int128_lt(remain.size, page_size)) {
1207 register_subpage(d, &now);
1208 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1209 now.size = page_size;
1210 register_subpage(d, &now);
1211 } else {
1212 now.size = int128_and(now.size, int128_neg(page_size));
1213 register_multipage(d, &now);
1218 void qemu_flush_coalesced_mmio_buffer(void)
1220 if (kvm_enabled())
1221 kvm_flush_coalesced_mmio_buffer();
1224 void qemu_mutex_lock_ramlist(void)
1226 qemu_mutex_lock(&ram_list.mutex);
1229 void qemu_mutex_unlock_ramlist(void)
1231 qemu_mutex_unlock(&ram_list.mutex);
1234 #ifdef __linux__
1235 static void *file_ram_alloc(RAMBlock *block,
1236 ram_addr_t memory,
1237 const char *path,
1238 Error **errp)
1240 bool unlink_on_error = false;
1241 char *filename;
1242 char *sanitized_name;
1243 char *c;
1244 void *area;
1245 int fd = -1;
1246 int64_t page_size;
1248 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1249 error_setg(errp,
1250 "host lacks kvm mmu notifiers, -mem-path unsupported");
1251 return NULL;
1254 for (;;) {
1255 fd = open(path, O_RDWR);
1256 if (fd >= 0) {
1257 /* @path names an existing file, use it */
1258 break;
1260 if (errno == ENOENT) {
1261 /* @path names a file that doesn't exist, create it */
1262 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1263 if (fd >= 0) {
1264 unlink_on_error = true;
1265 break;
1267 } else if (errno == EISDIR) {
1268 /* @path names a directory, create a file there */
1269 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1270 sanitized_name = g_strdup(memory_region_name(block->mr));
1271 for (c = sanitized_name; *c != '\0'; c++) {
1272 if (*c == '/') {
1273 *c = '_';
1277 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1278 sanitized_name);
1279 g_free(sanitized_name);
1281 fd = mkstemp(filename);
1282 if (fd >= 0) {
1283 unlink(filename);
1284 g_free(filename);
1285 break;
1287 g_free(filename);
1289 if (errno != EEXIST && errno != EINTR) {
1290 error_setg_errno(errp, errno,
1291 "can't open backing store %s for guest RAM",
1292 path);
1293 goto error;
1296 * Try again on EINTR and EEXIST. The latter happens when
1297 * something else creates the file between our two open().
1301 page_size = qemu_fd_getpagesize(fd);
1302 block->mr->align = page_size;
1304 if (memory < page_size) {
1305 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1306 "or larger than page size 0x%" PRIx64,
1307 memory, page_size);
1308 goto error;
1311 memory = ROUND_UP(memory, page_size);
1314 * ftruncate is not supported by hugetlbfs in older
1315 * hosts, so don't bother bailing out on errors.
1316 * If anything goes wrong with it under other filesystems,
1317 * mmap will fail.
1319 if (ftruncate(fd, memory)) {
1320 perror("ftruncate");
1323 area = qemu_ram_mmap(fd, memory, page_size, block->flags & RAM_SHARED);
1324 if (area == MAP_FAILED) {
1325 error_setg_errno(errp, errno,
1326 "unable to map backing store for guest RAM");
1327 goto error;
1330 if (mem_prealloc) {
1331 os_mem_prealloc(fd, area, memory);
1334 block->fd = fd;
1335 return area;
1337 error:
1338 if (unlink_on_error) {
1339 unlink(path);
1341 if (fd != -1) {
1342 close(fd);
1344 return NULL;
1346 #endif
1348 /* Called with the ramlist lock held. */
1349 static ram_addr_t find_ram_offset(ram_addr_t size)
1351 RAMBlock *block, *next_block;
1352 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1354 assert(size != 0); /* it would hand out same offset multiple times */
1356 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1357 return 0;
1360 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1361 ram_addr_t end, next = RAM_ADDR_MAX;
1363 end = block->offset + block->max_length;
1365 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1366 if (next_block->offset >= end) {
1367 next = MIN(next, next_block->offset);
1370 if (next - end >= size && next - end < mingap) {
1371 offset = end;
1372 mingap = next - end;
1376 if (offset == RAM_ADDR_MAX) {
1377 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1378 (uint64_t)size);
1379 abort();
1382 return offset;
1385 ram_addr_t last_ram_offset(void)
1387 RAMBlock *block;
1388 ram_addr_t last = 0;
1390 rcu_read_lock();
1391 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1392 last = MAX(last, block->offset + block->max_length);
1394 rcu_read_unlock();
1395 return last;
1398 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1400 int ret;
1402 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1403 if (!machine_dump_guest_core(current_machine)) {
1404 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1405 if (ret) {
1406 perror("qemu_madvise");
1407 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1408 "but dump_guest_core=off specified\n");
1413 /* Called within an RCU critical section, or while the ramlist lock
1414 * is held.
1416 static RAMBlock *find_ram_block(ram_addr_t addr)
1418 RAMBlock *block;
1420 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1421 if (block->offset == addr) {
1422 return block;
1426 return NULL;
1429 const char *qemu_ram_get_idstr(RAMBlock *rb)
1431 return rb->idstr;
1434 /* Called with iothread lock held. */
1435 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1437 RAMBlock *new_block, *block;
1439 rcu_read_lock();
1440 new_block = find_ram_block(addr);
1441 assert(new_block);
1442 assert(!new_block->idstr[0]);
1444 if (dev) {
1445 char *id = qdev_get_dev_path(dev);
1446 if (id) {
1447 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1448 g_free(id);
1451 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1453 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1454 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1455 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1456 new_block->idstr);
1457 abort();
1460 rcu_read_unlock();
1463 /* Called with iothread lock held. */
1464 void qemu_ram_unset_idstr(ram_addr_t addr)
1466 RAMBlock *block;
1468 /* FIXME: arch_init.c assumes that this is not called throughout
1469 * migration. Ignore the problem since hot-unplug during migration
1470 * does not work anyway.
1473 rcu_read_lock();
1474 block = find_ram_block(addr);
1475 if (block) {
1476 memset(block->idstr, 0, sizeof(block->idstr));
1478 rcu_read_unlock();
1481 static int memory_try_enable_merging(void *addr, size_t len)
1483 if (!machine_mem_merge(current_machine)) {
1484 /* disabled by the user */
1485 return 0;
1488 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1491 /* Only legal before guest might have detected the memory size: e.g. on
1492 * incoming migration, or right after reset.
1494 * As memory core doesn't know how is memory accessed, it is up to
1495 * resize callback to update device state and/or add assertions to detect
1496 * misuse, if necessary.
1498 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1500 RAMBlock *block = find_ram_block(base);
1502 assert(block);
1504 newsize = HOST_PAGE_ALIGN(newsize);
1506 if (block->used_length == newsize) {
1507 return 0;
1510 if (!(block->flags & RAM_RESIZEABLE)) {
1511 error_setg_errno(errp, EINVAL,
1512 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1513 " in != 0x" RAM_ADDR_FMT, block->idstr,
1514 newsize, block->used_length);
1515 return -EINVAL;
1518 if (block->max_length < newsize) {
1519 error_setg_errno(errp, EINVAL,
1520 "Length too large: %s: 0x" RAM_ADDR_FMT
1521 " > 0x" RAM_ADDR_FMT, block->idstr,
1522 newsize, block->max_length);
1523 return -EINVAL;
1526 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1527 block->used_length = newsize;
1528 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1529 DIRTY_CLIENTS_ALL);
1530 memory_region_set_size(block->mr, newsize);
1531 if (block->resized) {
1532 block->resized(block->idstr, newsize, block->host);
1534 return 0;
1537 /* Called with ram_list.mutex held */
1538 static void dirty_memory_extend(ram_addr_t old_ram_size,
1539 ram_addr_t new_ram_size)
1541 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1542 DIRTY_MEMORY_BLOCK_SIZE);
1543 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1544 DIRTY_MEMORY_BLOCK_SIZE);
1545 int i;
1547 /* Only need to extend if block count increased */
1548 if (new_num_blocks <= old_num_blocks) {
1549 return;
1552 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1553 DirtyMemoryBlocks *old_blocks;
1554 DirtyMemoryBlocks *new_blocks;
1555 int j;
1557 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1558 new_blocks = g_malloc(sizeof(*new_blocks) +
1559 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1561 if (old_num_blocks) {
1562 memcpy(new_blocks->blocks, old_blocks->blocks,
1563 old_num_blocks * sizeof(old_blocks->blocks[0]));
1566 for (j = old_num_blocks; j < new_num_blocks; j++) {
1567 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1570 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1572 if (old_blocks) {
1573 g_free_rcu(old_blocks, rcu);
1578 static void ram_block_add(RAMBlock *new_block, Error **errp)
1580 RAMBlock *block;
1581 RAMBlock *last_block = NULL;
1582 ram_addr_t old_ram_size, new_ram_size;
1583 Error *err = NULL;
1585 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1587 qemu_mutex_lock_ramlist();
1588 new_block->offset = find_ram_offset(new_block->max_length);
1590 if (!new_block->host) {
1591 if (xen_enabled()) {
1592 xen_ram_alloc(new_block->offset, new_block->max_length,
1593 new_block->mr, &err);
1594 if (err) {
1595 error_propagate(errp, err);
1596 qemu_mutex_unlock_ramlist();
1597 return;
1599 } else {
1600 new_block->host = phys_mem_alloc(new_block->max_length,
1601 &new_block->mr->align);
1602 if (!new_block->host) {
1603 error_setg_errno(errp, errno,
1604 "cannot set up guest memory '%s'",
1605 memory_region_name(new_block->mr));
1606 qemu_mutex_unlock_ramlist();
1607 return;
1609 memory_try_enable_merging(new_block->host, new_block->max_length);
1613 new_ram_size = MAX(old_ram_size,
1614 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1615 if (new_ram_size > old_ram_size) {
1616 migration_bitmap_extend(old_ram_size, new_ram_size);
1617 dirty_memory_extend(old_ram_size, new_ram_size);
1619 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1620 * QLIST (which has an RCU-friendly variant) does not have insertion at
1621 * tail, so save the last element in last_block.
1623 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1624 last_block = block;
1625 if (block->max_length < new_block->max_length) {
1626 break;
1629 if (block) {
1630 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1631 } else if (last_block) {
1632 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1633 } else { /* list is empty */
1634 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1636 ram_list.mru_block = NULL;
1638 /* Write list before version */
1639 smp_wmb();
1640 ram_list.version++;
1641 qemu_mutex_unlock_ramlist();
1643 cpu_physical_memory_set_dirty_range(new_block->offset,
1644 new_block->used_length,
1645 DIRTY_CLIENTS_ALL);
1647 if (new_block->host) {
1648 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1649 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1650 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1651 if (kvm_enabled()) {
1652 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1657 #ifdef __linux__
1658 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1659 bool share, const char *mem_path,
1660 Error **errp)
1662 RAMBlock *new_block;
1663 Error *local_err = NULL;
1665 if (xen_enabled()) {
1666 error_setg(errp, "-mem-path not supported with Xen");
1667 return NULL;
1670 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1672 * file_ram_alloc() needs to allocate just like
1673 * phys_mem_alloc, but we haven't bothered to provide
1674 * a hook there.
1676 error_setg(errp,
1677 "-mem-path not supported with this accelerator");
1678 return NULL;
1681 size = HOST_PAGE_ALIGN(size);
1682 new_block = g_malloc0(sizeof(*new_block));
1683 new_block->mr = mr;
1684 new_block->used_length = size;
1685 new_block->max_length = size;
1686 new_block->flags = share ? RAM_SHARED : 0;
1687 new_block->host = file_ram_alloc(new_block, size,
1688 mem_path, errp);
1689 if (!new_block->host) {
1690 g_free(new_block);
1691 return NULL;
1694 ram_block_add(new_block, &local_err);
1695 if (local_err) {
1696 g_free(new_block);
1697 error_propagate(errp, local_err);
1698 return NULL;
1700 return new_block;
1702 #endif
1704 static
1705 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1706 void (*resized)(const char*,
1707 uint64_t length,
1708 void *host),
1709 void *host, bool resizeable,
1710 MemoryRegion *mr, Error **errp)
1712 RAMBlock *new_block;
1713 Error *local_err = NULL;
1715 size = HOST_PAGE_ALIGN(size);
1716 max_size = HOST_PAGE_ALIGN(max_size);
1717 new_block = g_malloc0(sizeof(*new_block));
1718 new_block->mr = mr;
1719 new_block->resized = resized;
1720 new_block->used_length = size;
1721 new_block->max_length = max_size;
1722 assert(max_size >= size);
1723 new_block->fd = -1;
1724 new_block->host = host;
1725 if (host) {
1726 new_block->flags |= RAM_PREALLOC;
1728 if (resizeable) {
1729 new_block->flags |= RAM_RESIZEABLE;
1731 ram_block_add(new_block, &local_err);
1732 if (local_err) {
1733 g_free(new_block);
1734 error_propagate(errp, local_err);
1735 return NULL;
1737 return new_block;
1740 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1741 MemoryRegion *mr, Error **errp)
1743 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1746 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1748 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1751 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1752 void (*resized)(const char*,
1753 uint64_t length,
1754 void *host),
1755 MemoryRegion *mr, Error **errp)
1757 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1760 static void reclaim_ramblock(RAMBlock *block)
1762 if (block->flags & RAM_PREALLOC) {
1764 } else if (xen_enabled()) {
1765 xen_invalidate_map_cache_entry(block->host);
1766 #ifndef _WIN32
1767 } else if (block->fd >= 0) {
1768 qemu_ram_munmap(block->host, block->max_length);
1769 close(block->fd);
1770 #endif
1771 } else {
1772 qemu_anon_ram_free(block->host, block->max_length);
1774 g_free(block);
1777 void qemu_ram_free(RAMBlock *block)
1779 if (!block) {
1780 return;
1783 qemu_mutex_lock_ramlist();
1784 QLIST_REMOVE_RCU(block, next);
1785 ram_list.mru_block = NULL;
1786 /* Write list before version */
1787 smp_wmb();
1788 ram_list.version++;
1789 call_rcu(block, reclaim_ramblock, rcu);
1790 qemu_mutex_unlock_ramlist();
1793 #ifndef _WIN32
1794 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1796 RAMBlock *block;
1797 ram_addr_t offset;
1798 int flags;
1799 void *area, *vaddr;
1801 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1802 offset = addr - block->offset;
1803 if (offset < block->max_length) {
1804 vaddr = ramblock_ptr(block, offset);
1805 if (block->flags & RAM_PREALLOC) {
1807 } else if (xen_enabled()) {
1808 abort();
1809 } else {
1810 flags = MAP_FIXED;
1811 if (block->fd >= 0) {
1812 flags |= (block->flags & RAM_SHARED ?
1813 MAP_SHARED : MAP_PRIVATE);
1814 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1815 flags, block->fd, offset);
1816 } else {
1818 * Remap needs to match alloc. Accelerators that
1819 * set phys_mem_alloc never remap. If they did,
1820 * we'd need a remap hook here.
1822 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1824 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1825 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1826 flags, -1, 0);
1828 if (area != vaddr) {
1829 fprintf(stderr, "Could not remap addr: "
1830 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1831 length, addr);
1832 exit(1);
1834 memory_try_enable_merging(vaddr, length);
1835 qemu_ram_setup_dump(vaddr, length);
1840 #endif /* !_WIN32 */
1842 int qemu_get_ram_fd(ram_addr_t addr)
1844 RAMBlock *block;
1845 int fd;
1847 rcu_read_lock();
1848 block = qemu_get_ram_block(addr);
1849 fd = block->fd;
1850 rcu_read_unlock();
1851 return fd;
1854 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1856 RAMBlock *block;
1858 rcu_read_lock();
1859 block = qemu_get_ram_block(addr);
1860 block->fd = fd;
1861 rcu_read_unlock();
1864 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1866 RAMBlock *block;
1867 void *ptr;
1869 rcu_read_lock();
1870 block = qemu_get_ram_block(addr);
1871 ptr = ramblock_ptr(block, 0);
1872 rcu_read_unlock();
1873 return ptr;
1876 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1877 * This should not be used for general purpose DMA. Use address_space_map
1878 * or address_space_rw instead. For local memory (e.g. video ram) that the
1879 * device owns, use memory_region_get_ram_ptr.
1881 * Called within RCU critical section.
1883 void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1885 RAMBlock *block = ram_block;
1887 if (block == NULL) {
1888 block = qemu_get_ram_block(addr);
1891 if (xen_enabled() && block->host == NULL) {
1892 /* We need to check if the requested address is in the RAM
1893 * because we don't want to map the entire memory in QEMU.
1894 * In that case just map until the end of the page.
1896 if (block->offset == 0) {
1897 return xen_map_cache(addr, 0, 0);
1900 block->host = xen_map_cache(block->offset, block->max_length, 1);
1902 return ramblock_ptr(block, addr - block->offset);
1905 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1906 * but takes a size argument.
1908 * Called within RCU critical section.
1910 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1911 hwaddr *size)
1913 RAMBlock *block = ram_block;
1914 ram_addr_t offset_inside_block;
1915 if (*size == 0) {
1916 return NULL;
1919 if (block == NULL) {
1920 block = qemu_get_ram_block(addr);
1922 offset_inside_block = addr - block->offset;
1923 *size = MIN(*size, block->max_length - offset_inside_block);
1925 if (xen_enabled() && block->host == NULL) {
1926 /* We need to check if the requested address is in the RAM
1927 * because we don't want to map the entire memory in QEMU.
1928 * In that case just map the requested area.
1930 if (block->offset == 0) {
1931 return xen_map_cache(addr, *size, 1);
1934 block->host = xen_map_cache(block->offset, block->max_length, 1);
1937 return ramblock_ptr(block, offset_inside_block);
1941 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1942 * in that RAMBlock.
1944 * ptr: Host pointer to look up
1945 * round_offset: If true round the result offset down to a page boundary
1946 * *ram_addr: set to result ram_addr
1947 * *offset: set to result offset within the RAMBlock
1949 * Returns: RAMBlock (or NULL if not found)
1951 * By the time this function returns, the returned pointer is not protected
1952 * by RCU anymore. If the caller is not within an RCU critical section and
1953 * does not hold the iothread lock, it must have other means of protecting the
1954 * pointer, such as a reference to the region that includes the incoming
1955 * ram_addr_t.
1957 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1958 ram_addr_t *ram_addr,
1959 ram_addr_t *offset)
1961 RAMBlock *block;
1962 uint8_t *host = ptr;
1964 if (xen_enabled()) {
1965 rcu_read_lock();
1966 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1967 block = qemu_get_ram_block(*ram_addr);
1968 if (block) {
1969 *offset = (host - block->host);
1971 rcu_read_unlock();
1972 return block;
1975 rcu_read_lock();
1976 block = atomic_rcu_read(&ram_list.mru_block);
1977 if (block && block->host && host - block->host < block->max_length) {
1978 goto found;
1981 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1982 /* This case append when the block is not mapped. */
1983 if (block->host == NULL) {
1984 continue;
1986 if (host - block->host < block->max_length) {
1987 goto found;
1991 rcu_read_unlock();
1992 return NULL;
1994 found:
1995 *offset = (host - block->host);
1996 if (round_offset) {
1997 *offset &= TARGET_PAGE_MASK;
1999 *ram_addr = block->offset + *offset;
2000 rcu_read_unlock();
2001 return block;
2005 * Finds the named RAMBlock
2007 * name: The name of RAMBlock to find
2009 * Returns: RAMBlock (or NULL if not found)
2011 RAMBlock *qemu_ram_block_by_name(const char *name)
2013 RAMBlock *block;
2015 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2016 if (!strcmp(name, block->idstr)) {
2017 return block;
2021 return NULL;
2024 /* Some of the softmmu routines need to translate from a host pointer
2025 (typically a TLB entry) back to a ram offset. */
2026 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2028 RAMBlock *block;
2029 ram_addr_t offset; /* Not used */
2031 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2033 if (!block) {
2034 return NULL;
2037 return block->mr;
2040 /* Called within RCU critical section. */
2041 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2042 uint64_t val, unsigned size)
2044 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2045 tb_invalidate_phys_page_fast(ram_addr, size);
2047 switch (size) {
2048 case 1:
2049 stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2050 break;
2051 case 2:
2052 stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2053 break;
2054 case 4:
2055 stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2056 break;
2057 default:
2058 abort();
2060 /* Set both VGA and migration bits for simplicity and to remove
2061 * the notdirty callback faster.
2063 cpu_physical_memory_set_dirty_range(ram_addr, size,
2064 DIRTY_CLIENTS_NOCODE);
2065 /* we remove the notdirty callback only if the code has been
2066 flushed */
2067 if (!cpu_physical_memory_is_clean(ram_addr)) {
2068 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2072 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2073 unsigned size, bool is_write)
2075 return is_write;
2078 static const MemoryRegionOps notdirty_mem_ops = {
2079 .write = notdirty_mem_write,
2080 .valid.accepts = notdirty_mem_accepts,
2081 .endianness = DEVICE_NATIVE_ENDIAN,
2084 /* Generate a debug exception if a watchpoint has been hit. */
2085 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2087 CPUState *cpu = current_cpu;
2088 CPUClass *cc = CPU_GET_CLASS(cpu);
2089 CPUArchState *env = cpu->env_ptr;
2090 target_ulong pc, cs_base;
2091 target_ulong vaddr;
2092 CPUWatchpoint *wp;
2093 uint32_t cpu_flags;
2095 if (cpu->watchpoint_hit) {
2096 /* We re-entered the check after replacing the TB. Now raise
2097 * the debug interrupt so that is will trigger after the
2098 * current instruction. */
2099 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2100 return;
2102 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2103 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2104 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2105 && (wp->flags & flags)) {
2106 if (flags == BP_MEM_READ) {
2107 wp->flags |= BP_WATCHPOINT_HIT_READ;
2108 } else {
2109 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2111 wp->hitaddr = vaddr;
2112 wp->hitattrs = attrs;
2113 if (!cpu->watchpoint_hit) {
2114 if (wp->flags & BP_CPU &&
2115 !cc->debug_check_watchpoint(cpu, wp)) {
2116 wp->flags &= ~BP_WATCHPOINT_HIT;
2117 continue;
2119 cpu->watchpoint_hit = wp;
2120 tb_check_watchpoint(cpu);
2121 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2122 cpu->exception_index = EXCP_DEBUG;
2123 cpu_loop_exit(cpu);
2124 } else {
2125 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2126 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2127 cpu_resume_from_signal(cpu, NULL);
2130 } else {
2131 wp->flags &= ~BP_WATCHPOINT_HIT;
2136 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2137 so these check for a hit then pass through to the normal out-of-line
2138 phys routines. */
2139 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2140 unsigned size, MemTxAttrs attrs)
2142 MemTxResult res;
2143 uint64_t data;
2144 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2145 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2147 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2148 switch (size) {
2149 case 1:
2150 data = address_space_ldub(as, addr, attrs, &res);
2151 break;
2152 case 2:
2153 data = address_space_lduw(as, addr, attrs, &res);
2154 break;
2155 case 4:
2156 data = address_space_ldl(as, addr, attrs, &res);
2157 break;
2158 default: abort();
2160 *pdata = data;
2161 return res;
2164 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2165 uint64_t val, unsigned size,
2166 MemTxAttrs attrs)
2168 MemTxResult res;
2169 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2170 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2172 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2173 switch (size) {
2174 case 1:
2175 address_space_stb(as, addr, val, attrs, &res);
2176 break;
2177 case 2:
2178 address_space_stw(as, addr, val, attrs, &res);
2179 break;
2180 case 4:
2181 address_space_stl(as, addr, val, attrs, &res);
2182 break;
2183 default: abort();
2185 return res;
2188 static const MemoryRegionOps watch_mem_ops = {
2189 .read_with_attrs = watch_mem_read,
2190 .write_with_attrs = watch_mem_write,
2191 .endianness = DEVICE_NATIVE_ENDIAN,
2194 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2195 unsigned len, MemTxAttrs attrs)
2197 subpage_t *subpage = opaque;
2198 uint8_t buf[8];
2199 MemTxResult res;
2201 #if defined(DEBUG_SUBPAGE)
2202 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2203 subpage, len, addr);
2204 #endif
2205 res = address_space_read(subpage->as, addr + subpage->base,
2206 attrs, buf, len);
2207 if (res) {
2208 return res;
2210 switch (len) {
2211 case 1:
2212 *data = ldub_p(buf);
2213 return MEMTX_OK;
2214 case 2:
2215 *data = lduw_p(buf);
2216 return MEMTX_OK;
2217 case 4:
2218 *data = ldl_p(buf);
2219 return MEMTX_OK;
2220 case 8:
2221 *data = ldq_p(buf);
2222 return MEMTX_OK;
2223 default:
2224 abort();
2228 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2229 uint64_t value, unsigned len, MemTxAttrs attrs)
2231 subpage_t *subpage = opaque;
2232 uint8_t buf[8];
2234 #if defined(DEBUG_SUBPAGE)
2235 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2236 " value %"PRIx64"\n",
2237 __func__, subpage, len, addr, value);
2238 #endif
2239 switch (len) {
2240 case 1:
2241 stb_p(buf, value);
2242 break;
2243 case 2:
2244 stw_p(buf, value);
2245 break;
2246 case 4:
2247 stl_p(buf, value);
2248 break;
2249 case 8:
2250 stq_p(buf, value);
2251 break;
2252 default:
2253 abort();
2255 return address_space_write(subpage->as, addr + subpage->base,
2256 attrs, buf, len);
2259 static bool subpage_accepts(void *opaque, hwaddr addr,
2260 unsigned len, bool is_write)
2262 subpage_t *subpage = opaque;
2263 #if defined(DEBUG_SUBPAGE)
2264 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2265 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2266 #endif
2268 return address_space_access_valid(subpage->as, addr + subpage->base,
2269 len, is_write);
2272 static const MemoryRegionOps subpage_ops = {
2273 .read_with_attrs = subpage_read,
2274 .write_with_attrs = subpage_write,
2275 .impl.min_access_size = 1,
2276 .impl.max_access_size = 8,
2277 .valid.min_access_size = 1,
2278 .valid.max_access_size = 8,
2279 .valid.accepts = subpage_accepts,
2280 .endianness = DEVICE_NATIVE_ENDIAN,
2283 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2284 uint16_t section)
2286 int idx, eidx;
2288 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2289 return -1;
2290 idx = SUBPAGE_IDX(start);
2291 eidx = SUBPAGE_IDX(end);
2292 #if defined(DEBUG_SUBPAGE)
2293 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2294 __func__, mmio, start, end, idx, eidx, section);
2295 #endif
2296 for (; idx <= eidx; idx++) {
2297 mmio->sub_section[idx] = section;
2300 return 0;
2303 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2305 subpage_t *mmio;
2307 mmio = g_malloc0(sizeof(subpage_t));
2309 mmio->as = as;
2310 mmio->base = base;
2311 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2312 NULL, TARGET_PAGE_SIZE);
2313 mmio->iomem.subpage = true;
2314 #if defined(DEBUG_SUBPAGE)
2315 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2316 mmio, base, TARGET_PAGE_SIZE);
2317 #endif
2318 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2320 return mmio;
2323 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2324 MemoryRegion *mr)
2326 assert(as);
2327 MemoryRegionSection section = {
2328 .address_space = as,
2329 .mr = mr,
2330 .offset_within_address_space = 0,
2331 .offset_within_region = 0,
2332 .size = int128_2_64(),
2335 return phys_section_add(map, &section);
2338 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2340 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2341 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2342 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2343 MemoryRegionSection *sections = d->map.sections;
2345 return sections[index & ~TARGET_PAGE_MASK].mr;
2348 static void io_mem_init(void)
2350 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2351 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2352 NULL, UINT64_MAX);
2353 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2354 NULL, UINT64_MAX);
2355 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2356 NULL, UINT64_MAX);
2359 static void mem_begin(MemoryListener *listener)
2361 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2362 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2363 uint16_t n;
2365 n = dummy_section(&d->map, as, &io_mem_unassigned);
2366 assert(n == PHYS_SECTION_UNASSIGNED);
2367 n = dummy_section(&d->map, as, &io_mem_notdirty);
2368 assert(n == PHYS_SECTION_NOTDIRTY);
2369 n = dummy_section(&d->map, as, &io_mem_rom);
2370 assert(n == PHYS_SECTION_ROM);
2371 n = dummy_section(&d->map, as, &io_mem_watch);
2372 assert(n == PHYS_SECTION_WATCH);
2374 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2375 d->as = as;
2376 as->next_dispatch = d;
2379 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2381 phys_sections_free(&d->map);
2382 g_free(d);
2385 static void mem_commit(MemoryListener *listener)
2387 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2388 AddressSpaceDispatch *cur = as->dispatch;
2389 AddressSpaceDispatch *next = as->next_dispatch;
2391 phys_page_compact_all(next, next->map.nodes_nb);
2393 atomic_rcu_set(&as->dispatch, next);
2394 if (cur) {
2395 call_rcu(cur, address_space_dispatch_free, rcu);
2399 static void tcg_commit(MemoryListener *listener)
2401 CPUAddressSpace *cpuas;
2402 AddressSpaceDispatch *d;
2404 /* since each CPU stores ram addresses in its TLB cache, we must
2405 reset the modified entries */
2406 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2407 cpu_reloading_memory_map();
2408 /* The CPU and TLB are protected by the iothread lock.
2409 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2410 * may have split the RCU critical section.
2412 d = atomic_rcu_read(&cpuas->as->dispatch);
2413 cpuas->memory_dispatch = d;
2414 tlb_flush(cpuas->cpu, 1);
2417 void address_space_init_dispatch(AddressSpace *as)
2419 as->dispatch = NULL;
2420 as->dispatch_listener = (MemoryListener) {
2421 .begin = mem_begin,
2422 .commit = mem_commit,
2423 .region_add = mem_add,
2424 .region_nop = mem_add,
2425 .priority = 0,
2427 memory_listener_register(&as->dispatch_listener, as);
2430 void address_space_unregister(AddressSpace *as)
2432 memory_listener_unregister(&as->dispatch_listener);
2435 void address_space_destroy_dispatch(AddressSpace *as)
2437 AddressSpaceDispatch *d = as->dispatch;
2439 atomic_rcu_set(&as->dispatch, NULL);
2440 if (d) {
2441 call_rcu(d, address_space_dispatch_free, rcu);
2445 static void memory_map_init(void)
2447 system_memory = g_malloc(sizeof(*system_memory));
2449 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2450 address_space_init(&address_space_memory, system_memory, "memory");
2452 system_io = g_malloc(sizeof(*system_io));
2453 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2454 65536);
2455 address_space_init(&address_space_io, system_io, "I/O");
2458 MemoryRegion *get_system_memory(void)
2460 return system_memory;
2463 MemoryRegion *get_system_io(void)
2465 return system_io;
2468 #endif /* !defined(CONFIG_USER_ONLY) */
2470 /* physical memory access (slow version, mainly for debug) */
2471 #if defined(CONFIG_USER_ONLY)
2472 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2473 uint8_t *buf, int len, int is_write)
2475 int l, flags;
2476 target_ulong page;
2477 void * p;
2479 while (len > 0) {
2480 page = addr & TARGET_PAGE_MASK;
2481 l = (page + TARGET_PAGE_SIZE) - addr;
2482 if (l > len)
2483 l = len;
2484 flags = page_get_flags(page);
2485 if (!(flags & PAGE_VALID))
2486 return -1;
2487 if (is_write) {
2488 if (!(flags & PAGE_WRITE))
2489 return -1;
2490 /* XXX: this code should not depend on lock_user */
2491 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2492 return -1;
2493 memcpy(p, buf, l);
2494 unlock_user(p, addr, l);
2495 } else {
2496 if (!(flags & PAGE_READ))
2497 return -1;
2498 /* XXX: this code should not depend on lock_user */
2499 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2500 return -1;
2501 memcpy(buf, p, l);
2502 unlock_user(p, addr, 0);
2504 len -= l;
2505 buf += l;
2506 addr += l;
2508 return 0;
2511 #else
2513 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2514 hwaddr length)
2516 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2517 /* No early return if dirty_log_mask is or becomes 0, because
2518 * cpu_physical_memory_set_dirty_range will still call
2519 * xen_modified_memory.
2521 if (dirty_log_mask) {
2522 dirty_log_mask =
2523 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2525 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2526 tb_invalidate_phys_range(addr, addr + length);
2527 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2529 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2532 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2534 unsigned access_size_max = mr->ops->valid.max_access_size;
2536 /* Regions are assumed to support 1-4 byte accesses unless
2537 otherwise specified. */
2538 if (access_size_max == 0) {
2539 access_size_max = 4;
2542 /* Bound the maximum access by the alignment of the address. */
2543 if (!mr->ops->impl.unaligned) {
2544 unsigned align_size_max = addr & -addr;
2545 if (align_size_max != 0 && align_size_max < access_size_max) {
2546 access_size_max = align_size_max;
2550 /* Don't attempt accesses larger than the maximum. */
2551 if (l > access_size_max) {
2552 l = access_size_max;
2554 l = pow2floor(l);
2556 return l;
2559 static bool prepare_mmio_access(MemoryRegion *mr)
2561 bool unlocked = !qemu_mutex_iothread_locked();
2562 bool release_lock = false;
2564 if (unlocked && mr->global_locking) {
2565 qemu_mutex_lock_iothread();
2566 unlocked = false;
2567 release_lock = true;
2569 if (mr->flush_coalesced_mmio) {
2570 if (unlocked) {
2571 qemu_mutex_lock_iothread();
2573 qemu_flush_coalesced_mmio_buffer();
2574 if (unlocked) {
2575 qemu_mutex_unlock_iothread();
2579 return release_lock;
2582 /* Called within RCU critical section. */
2583 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2584 MemTxAttrs attrs,
2585 const uint8_t *buf,
2586 int len, hwaddr addr1,
2587 hwaddr l, MemoryRegion *mr)
2589 uint8_t *ptr;
2590 uint64_t val;
2591 MemTxResult result = MEMTX_OK;
2592 bool release_lock = false;
2594 for (;;) {
2595 if (!memory_access_is_direct(mr, true)) {
2596 release_lock |= prepare_mmio_access(mr);
2597 l = memory_access_size(mr, l, addr1);
2598 /* XXX: could force current_cpu to NULL to avoid
2599 potential bugs */
2600 switch (l) {
2601 case 8:
2602 /* 64 bit write access */
2603 val = ldq_p(buf);
2604 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2605 attrs);
2606 break;
2607 case 4:
2608 /* 32 bit write access */
2609 val = ldl_p(buf);
2610 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2611 attrs);
2612 break;
2613 case 2:
2614 /* 16 bit write access */
2615 val = lduw_p(buf);
2616 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2617 attrs);
2618 break;
2619 case 1:
2620 /* 8 bit write access */
2621 val = ldub_p(buf);
2622 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2623 attrs);
2624 break;
2625 default:
2626 abort();
2628 } else {
2629 addr1 += memory_region_get_ram_addr(mr);
2630 /* RAM case */
2631 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2632 memcpy(ptr, buf, l);
2633 invalidate_and_set_dirty(mr, addr1, l);
2636 if (release_lock) {
2637 qemu_mutex_unlock_iothread();
2638 release_lock = false;
2641 len -= l;
2642 buf += l;
2643 addr += l;
2645 if (!len) {
2646 break;
2649 l = len;
2650 mr = address_space_translate(as, addr, &addr1, &l, true);
2653 return result;
2656 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2657 const uint8_t *buf, int len)
2659 hwaddr l;
2660 hwaddr addr1;
2661 MemoryRegion *mr;
2662 MemTxResult result = MEMTX_OK;
2664 if (len > 0) {
2665 rcu_read_lock();
2666 l = len;
2667 mr = address_space_translate(as, addr, &addr1, &l, true);
2668 result = address_space_write_continue(as, addr, attrs, buf, len,
2669 addr1, l, mr);
2670 rcu_read_unlock();
2673 return result;
2676 /* Called within RCU critical section. */
2677 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2678 MemTxAttrs attrs, uint8_t *buf,
2679 int len, hwaddr addr1, hwaddr l,
2680 MemoryRegion *mr)
2682 uint8_t *ptr;
2683 uint64_t val;
2684 MemTxResult result = MEMTX_OK;
2685 bool release_lock = false;
2687 for (;;) {
2688 if (!memory_access_is_direct(mr, false)) {
2689 /* I/O case */
2690 release_lock |= prepare_mmio_access(mr);
2691 l = memory_access_size(mr, l, addr1);
2692 switch (l) {
2693 case 8:
2694 /* 64 bit read access */
2695 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2696 attrs);
2697 stq_p(buf, val);
2698 break;
2699 case 4:
2700 /* 32 bit read access */
2701 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2702 attrs);
2703 stl_p(buf, val);
2704 break;
2705 case 2:
2706 /* 16 bit read access */
2707 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2708 attrs);
2709 stw_p(buf, val);
2710 break;
2711 case 1:
2712 /* 8 bit read access */
2713 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2714 attrs);
2715 stb_p(buf, val);
2716 break;
2717 default:
2718 abort();
2720 } else {
2721 /* RAM case */
2722 ptr = qemu_get_ram_ptr(mr->ram_block,
2723 memory_region_get_ram_addr(mr) + addr1);
2724 memcpy(buf, ptr, l);
2727 if (release_lock) {
2728 qemu_mutex_unlock_iothread();
2729 release_lock = false;
2732 len -= l;
2733 buf += l;
2734 addr += l;
2736 if (!len) {
2737 break;
2740 l = len;
2741 mr = address_space_translate(as, addr, &addr1, &l, false);
2744 return result;
2747 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2748 MemTxAttrs attrs, uint8_t *buf, int len)
2750 hwaddr l;
2751 hwaddr addr1;
2752 MemoryRegion *mr;
2753 MemTxResult result = MEMTX_OK;
2755 if (len > 0) {
2756 rcu_read_lock();
2757 l = len;
2758 mr = address_space_translate(as, addr, &addr1, &l, false);
2759 result = address_space_read_continue(as, addr, attrs, buf, len,
2760 addr1, l, mr);
2761 rcu_read_unlock();
2764 return result;
2767 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2768 uint8_t *buf, int len, bool is_write)
2770 if (is_write) {
2771 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2772 } else {
2773 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2777 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2778 int len, int is_write)
2780 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2781 buf, len, is_write);
2784 enum write_rom_type {
2785 WRITE_DATA,
2786 FLUSH_CACHE,
2789 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2790 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2792 hwaddr l;
2793 uint8_t *ptr;
2794 hwaddr addr1;
2795 MemoryRegion *mr;
2797 rcu_read_lock();
2798 while (len > 0) {
2799 l = len;
2800 mr = address_space_translate(as, addr, &addr1, &l, true);
2802 if (!(memory_region_is_ram(mr) ||
2803 memory_region_is_romd(mr))) {
2804 l = memory_access_size(mr, l, addr1);
2805 } else {
2806 addr1 += memory_region_get_ram_addr(mr);
2807 /* ROM/RAM case */
2808 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2809 switch (type) {
2810 case WRITE_DATA:
2811 memcpy(ptr, buf, l);
2812 invalidate_and_set_dirty(mr, addr1, l);
2813 break;
2814 case FLUSH_CACHE:
2815 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2816 break;
2819 len -= l;
2820 buf += l;
2821 addr += l;
2823 rcu_read_unlock();
2826 /* used for ROM loading : can write in RAM and ROM */
2827 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2828 const uint8_t *buf, int len)
2830 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2833 void cpu_flush_icache_range(hwaddr start, int len)
2836 * This function should do the same thing as an icache flush that was
2837 * triggered from within the guest. For TCG we are always cache coherent,
2838 * so there is no need to flush anything. For KVM / Xen we need to flush
2839 * the host's instruction cache at least.
2841 if (tcg_enabled()) {
2842 return;
2845 cpu_physical_memory_write_rom_internal(&address_space_memory,
2846 start, NULL, len, FLUSH_CACHE);
2849 typedef struct {
2850 MemoryRegion *mr;
2851 void *buffer;
2852 hwaddr addr;
2853 hwaddr len;
2854 bool in_use;
2855 } BounceBuffer;
2857 static BounceBuffer bounce;
2859 typedef struct MapClient {
2860 QEMUBH *bh;
2861 QLIST_ENTRY(MapClient) link;
2862 } MapClient;
2864 QemuMutex map_client_list_lock;
2865 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2866 = QLIST_HEAD_INITIALIZER(map_client_list);
2868 static void cpu_unregister_map_client_do(MapClient *client)
2870 QLIST_REMOVE(client, link);
2871 g_free(client);
2874 static void cpu_notify_map_clients_locked(void)
2876 MapClient *client;
2878 while (!QLIST_EMPTY(&map_client_list)) {
2879 client = QLIST_FIRST(&map_client_list);
2880 qemu_bh_schedule(client->bh);
2881 cpu_unregister_map_client_do(client);
2885 void cpu_register_map_client(QEMUBH *bh)
2887 MapClient *client = g_malloc(sizeof(*client));
2889 qemu_mutex_lock(&map_client_list_lock);
2890 client->bh = bh;
2891 QLIST_INSERT_HEAD(&map_client_list, client, link);
2892 if (!atomic_read(&bounce.in_use)) {
2893 cpu_notify_map_clients_locked();
2895 qemu_mutex_unlock(&map_client_list_lock);
2898 void cpu_exec_init_all(void)
2900 qemu_mutex_init(&ram_list.mutex);
2901 io_mem_init();
2902 memory_map_init();
2903 qemu_mutex_init(&map_client_list_lock);
2906 void cpu_unregister_map_client(QEMUBH *bh)
2908 MapClient *client;
2910 qemu_mutex_lock(&map_client_list_lock);
2911 QLIST_FOREACH(client, &map_client_list, link) {
2912 if (client->bh == bh) {
2913 cpu_unregister_map_client_do(client);
2914 break;
2917 qemu_mutex_unlock(&map_client_list_lock);
2920 static void cpu_notify_map_clients(void)
2922 qemu_mutex_lock(&map_client_list_lock);
2923 cpu_notify_map_clients_locked();
2924 qemu_mutex_unlock(&map_client_list_lock);
2927 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2929 MemoryRegion *mr;
2930 hwaddr l, xlat;
2932 rcu_read_lock();
2933 while (len > 0) {
2934 l = len;
2935 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2936 if (!memory_access_is_direct(mr, is_write)) {
2937 l = memory_access_size(mr, l, addr);
2938 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2939 return false;
2943 len -= l;
2944 addr += l;
2946 rcu_read_unlock();
2947 return true;
2950 /* Map a physical memory region into a host virtual address.
2951 * May map a subset of the requested range, given by and returned in *plen.
2952 * May return NULL if resources needed to perform the mapping are exhausted.
2953 * Use only for reads OR writes - not for read-modify-write operations.
2954 * Use cpu_register_map_client() to know when retrying the map operation is
2955 * likely to succeed.
2957 void *address_space_map(AddressSpace *as,
2958 hwaddr addr,
2959 hwaddr *plen,
2960 bool is_write)
2962 hwaddr len = *plen;
2963 hwaddr done = 0;
2964 hwaddr l, xlat, base;
2965 MemoryRegion *mr, *this_mr;
2966 ram_addr_t raddr;
2967 void *ptr;
2969 if (len == 0) {
2970 return NULL;
2973 l = len;
2974 rcu_read_lock();
2975 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2977 if (!memory_access_is_direct(mr, is_write)) {
2978 if (atomic_xchg(&bounce.in_use, true)) {
2979 rcu_read_unlock();
2980 return NULL;
2982 /* Avoid unbounded allocations */
2983 l = MIN(l, TARGET_PAGE_SIZE);
2984 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2985 bounce.addr = addr;
2986 bounce.len = l;
2988 memory_region_ref(mr);
2989 bounce.mr = mr;
2990 if (!is_write) {
2991 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2992 bounce.buffer, l);
2995 rcu_read_unlock();
2996 *plen = l;
2997 return bounce.buffer;
3000 base = xlat;
3001 raddr = memory_region_get_ram_addr(mr);
3003 for (;;) {
3004 len -= l;
3005 addr += l;
3006 done += l;
3007 if (len == 0) {
3008 break;
3011 l = len;
3012 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3013 if (this_mr != mr || xlat != base + done) {
3014 break;
3018 memory_region_ref(mr);
3019 *plen = done;
3020 ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3021 rcu_read_unlock();
3023 return ptr;
3026 /* Unmaps a memory region previously mapped by address_space_map().
3027 * Will also mark the memory as dirty if is_write == 1. access_len gives
3028 * the amount of memory that was actually read or written by the caller.
3030 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3031 int is_write, hwaddr access_len)
3033 if (buffer != bounce.buffer) {
3034 MemoryRegion *mr;
3035 ram_addr_t addr1;
3037 mr = qemu_ram_addr_from_host(buffer, &addr1);
3038 assert(mr != NULL);
3039 if (is_write) {
3040 invalidate_and_set_dirty(mr, addr1, access_len);
3042 if (xen_enabled()) {
3043 xen_invalidate_map_cache_entry(buffer);
3045 memory_region_unref(mr);
3046 return;
3048 if (is_write) {
3049 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3050 bounce.buffer, access_len);
3052 qemu_vfree(bounce.buffer);
3053 bounce.buffer = NULL;
3054 memory_region_unref(bounce.mr);
3055 atomic_mb_set(&bounce.in_use, false);
3056 cpu_notify_map_clients();
3059 void *cpu_physical_memory_map(hwaddr addr,
3060 hwaddr *plen,
3061 int is_write)
3063 return address_space_map(&address_space_memory, addr, plen, is_write);
3066 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3067 int is_write, hwaddr access_len)
3069 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3072 /* warning: addr must be aligned */
3073 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3074 MemTxAttrs attrs,
3075 MemTxResult *result,
3076 enum device_endian endian)
3078 uint8_t *ptr;
3079 uint64_t val;
3080 MemoryRegion *mr;
3081 hwaddr l = 4;
3082 hwaddr addr1;
3083 MemTxResult r;
3084 bool release_lock = false;
3086 rcu_read_lock();
3087 mr = address_space_translate(as, addr, &addr1, &l, false);
3088 if (l < 4 || !memory_access_is_direct(mr, false)) {
3089 release_lock |= prepare_mmio_access(mr);
3091 /* I/O case */
3092 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3093 #if defined(TARGET_WORDS_BIGENDIAN)
3094 if (endian == DEVICE_LITTLE_ENDIAN) {
3095 val = bswap32(val);
3097 #else
3098 if (endian == DEVICE_BIG_ENDIAN) {
3099 val = bswap32(val);
3101 #endif
3102 } else {
3103 /* RAM case */
3104 ptr = qemu_get_ram_ptr(mr->ram_block,
3105 (memory_region_get_ram_addr(mr)
3106 & TARGET_PAGE_MASK)
3107 + addr1);
3108 switch (endian) {
3109 case DEVICE_LITTLE_ENDIAN:
3110 val = ldl_le_p(ptr);
3111 break;
3112 case DEVICE_BIG_ENDIAN:
3113 val = ldl_be_p(ptr);
3114 break;
3115 default:
3116 val = ldl_p(ptr);
3117 break;
3119 r = MEMTX_OK;
3121 if (result) {
3122 *result = r;
3124 if (release_lock) {
3125 qemu_mutex_unlock_iothread();
3127 rcu_read_unlock();
3128 return val;
3131 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3132 MemTxAttrs attrs, MemTxResult *result)
3134 return address_space_ldl_internal(as, addr, attrs, result,
3135 DEVICE_NATIVE_ENDIAN);
3138 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3139 MemTxAttrs attrs, MemTxResult *result)
3141 return address_space_ldl_internal(as, addr, attrs, result,
3142 DEVICE_LITTLE_ENDIAN);
3145 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3146 MemTxAttrs attrs, MemTxResult *result)
3148 return address_space_ldl_internal(as, addr, attrs, result,
3149 DEVICE_BIG_ENDIAN);
3152 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3154 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3157 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3159 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3162 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3164 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3167 /* warning: addr must be aligned */
3168 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3169 MemTxAttrs attrs,
3170 MemTxResult *result,
3171 enum device_endian endian)
3173 uint8_t *ptr;
3174 uint64_t val;
3175 MemoryRegion *mr;
3176 hwaddr l = 8;
3177 hwaddr addr1;
3178 MemTxResult r;
3179 bool release_lock = false;
3181 rcu_read_lock();
3182 mr = address_space_translate(as, addr, &addr1, &l,
3183 false);
3184 if (l < 8 || !memory_access_is_direct(mr, false)) {
3185 release_lock |= prepare_mmio_access(mr);
3187 /* I/O case */
3188 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3189 #if defined(TARGET_WORDS_BIGENDIAN)
3190 if (endian == DEVICE_LITTLE_ENDIAN) {
3191 val = bswap64(val);
3193 #else
3194 if (endian == DEVICE_BIG_ENDIAN) {
3195 val = bswap64(val);
3197 #endif
3198 } else {
3199 /* RAM case */
3200 ptr = qemu_get_ram_ptr(mr->ram_block,
3201 (memory_region_get_ram_addr(mr)
3202 & TARGET_PAGE_MASK)
3203 + addr1);
3204 switch (endian) {
3205 case DEVICE_LITTLE_ENDIAN:
3206 val = ldq_le_p(ptr);
3207 break;
3208 case DEVICE_BIG_ENDIAN:
3209 val = ldq_be_p(ptr);
3210 break;
3211 default:
3212 val = ldq_p(ptr);
3213 break;
3215 r = MEMTX_OK;
3217 if (result) {
3218 *result = r;
3220 if (release_lock) {
3221 qemu_mutex_unlock_iothread();
3223 rcu_read_unlock();
3224 return val;
3227 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3228 MemTxAttrs attrs, MemTxResult *result)
3230 return address_space_ldq_internal(as, addr, attrs, result,
3231 DEVICE_NATIVE_ENDIAN);
3234 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3235 MemTxAttrs attrs, MemTxResult *result)
3237 return address_space_ldq_internal(as, addr, attrs, result,
3238 DEVICE_LITTLE_ENDIAN);
3241 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3242 MemTxAttrs attrs, MemTxResult *result)
3244 return address_space_ldq_internal(as, addr, attrs, result,
3245 DEVICE_BIG_ENDIAN);
3248 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3250 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3253 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3255 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3258 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3260 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3263 /* XXX: optimize */
3264 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3265 MemTxAttrs attrs, MemTxResult *result)
3267 uint8_t val;
3268 MemTxResult r;
3270 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3271 if (result) {
3272 *result = r;
3274 return val;
3277 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3279 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3282 /* warning: addr must be aligned */
3283 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3284 hwaddr addr,
3285 MemTxAttrs attrs,
3286 MemTxResult *result,
3287 enum device_endian endian)
3289 uint8_t *ptr;
3290 uint64_t val;
3291 MemoryRegion *mr;
3292 hwaddr l = 2;
3293 hwaddr addr1;
3294 MemTxResult r;
3295 bool release_lock = false;
3297 rcu_read_lock();
3298 mr = address_space_translate(as, addr, &addr1, &l,
3299 false);
3300 if (l < 2 || !memory_access_is_direct(mr, false)) {
3301 release_lock |= prepare_mmio_access(mr);
3303 /* I/O case */
3304 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3305 #if defined(TARGET_WORDS_BIGENDIAN)
3306 if (endian == DEVICE_LITTLE_ENDIAN) {
3307 val = bswap16(val);
3309 #else
3310 if (endian == DEVICE_BIG_ENDIAN) {
3311 val = bswap16(val);
3313 #endif
3314 } else {
3315 /* RAM case */
3316 ptr = qemu_get_ram_ptr(mr->ram_block,
3317 (memory_region_get_ram_addr(mr)
3318 & TARGET_PAGE_MASK)
3319 + addr1);
3320 switch (endian) {
3321 case DEVICE_LITTLE_ENDIAN:
3322 val = lduw_le_p(ptr);
3323 break;
3324 case DEVICE_BIG_ENDIAN:
3325 val = lduw_be_p(ptr);
3326 break;
3327 default:
3328 val = lduw_p(ptr);
3329 break;
3331 r = MEMTX_OK;
3333 if (result) {
3334 *result = r;
3336 if (release_lock) {
3337 qemu_mutex_unlock_iothread();
3339 rcu_read_unlock();
3340 return val;
3343 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3344 MemTxAttrs attrs, MemTxResult *result)
3346 return address_space_lduw_internal(as, addr, attrs, result,
3347 DEVICE_NATIVE_ENDIAN);
3350 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3351 MemTxAttrs attrs, MemTxResult *result)
3353 return address_space_lduw_internal(as, addr, attrs, result,
3354 DEVICE_LITTLE_ENDIAN);
3357 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3358 MemTxAttrs attrs, MemTxResult *result)
3360 return address_space_lduw_internal(as, addr, attrs, result,
3361 DEVICE_BIG_ENDIAN);
3364 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3366 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3369 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3371 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3374 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3376 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3379 /* warning: addr must be aligned. The ram page is not masked as dirty
3380 and the code inside is not invalidated. It is useful if the dirty
3381 bits are used to track modified PTEs */
3382 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3383 MemTxAttrs attrs, MemTxResult *result)
3385 uint8_t *ptr;
3386 MemoryRegion *mr;
3387 hwaddr l = 4;
3388 hwaddr addr1;
3389 MemTxResult r;
3390 uint8_t dirty_log_mask;
3391 bool release_lock = false;
3393 rcu_read_lock();
3394 mr = address_space_translate(as, addr, &addr1, &l,
3395 true);
3396 if (l < 4 || !memory_access_is_direct(mr, true)) {
3397 release_lock |= prepare_mmio_access(mr);
3399 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3400 } else {
3401 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3402 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3403 stl_p(ptr, val);
3405 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3406 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3407 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3408 r = MEMTX_OK;
3410 if (result) {
3411 *result = r;
3413 if (release_lock) {
3414 qemu_mutex_unlock_iothread();
3416 rcu_read_unlock();
3419 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3421 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3424 /* warning: addr must be aligned */
3425 static inline void address_space_stl_internal(AddressSpace *as,
3426 hwaddr addr, uint32_t val,
3427 MemTxAttrs attrs,
3428 MemTxResult *result,
3429 enum device_endian endian)
3431 uint8_t *ptr;
3432 MemoryRegion *mr;
3433 hwaddr l = 4;
3434 hwaddr addr1;
3435 MemTxResult r;
3436 bool release_lock = false;
3438 rcu_read_lock();
3439 mr = address_space_translate(as, addr, &addr1, &l,
3440 true);
3441 if (l < 4 || !memory_access_is_direct(mr, true)) {
3442 release_lock |= prepare_mmio_access(mr);
3444 #if defined(TARGET_WORDS_BIGENDIAN)
3445 if (endian == DEVICE_LITTLE_ENDIAN) {
3446 val = bswap32(val);
3448 #else
3449 if (endian == DEVICE_BIG_ENDIAN) {
3450 val = bswap32(val);
3452 #endif
3453 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3454 } else {
3455 /* RAM case */
3456 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3457 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3458 switch (endian) {
3459 case DEVICE_LITTLE_ENDIAN:
3460 stl_le_p(ptr, val);
3461 break;
3462 case DEVICE_BIG_ENDIAN:
3463 stl_be_p(ptr, val);
3464 break;
3465 default:
3466 stl_p(ptr, val);
3467 break;
3469 invalidate_and_set_dirty(mr, addr1, 4);
3470 r = MEMTX_OK;
3472 if (result) {
3473 *result = r;
3475 if (release_lock) {
3476 qemu_mutex_unlock_iothread();
3478 rcu_read_unlock();
3481 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3482 MemTxAttrs attrs, MemTxResult *result)
3484 address_space_stl_internal(as, addr, val, attrs, result,
3485 DEVICE_NATIVE_ENDIAN);
3488 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3489 MemTxAttrs attrs, MemTxResult *result)
3491 address_space_stl_internal(as, addr, val, attrs, result,
3492 DEVICE_LITTLE_ENDIAN);
3495 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3496 MemTxAttrs attrs, MemTxResult *result)
3498 address_space_stl_internal(as, addr, val, attrs, result,
3499 DEVICE_BIG_ENDIAN);
3502 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3504 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3507 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3509 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3512 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3514 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3517 /* XXX: optimize */
3518 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3519 MemTxAttrs attrs, MemTxResult *result)
3521 uint8_t v = val;
3522 MemTxResult r;
3524 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3525 if (result) {
3526 *result = r;
3530 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3532 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3535 /* warning: addr must be aligned */
3536 static inline void address_space_stw_internal(AddressSpace *as,
3537 hwaddr addr, uint32_t val,
3538 MemTxAttrs attrs,
3539 MemTxResult *result,
3540 enum device_endian endian)
3542 uint8_t *ptr;
3543 MemoryRegion *mr;
3544 hwaddr l = 2;
3545 hwaddr addr1;
3546 MemTxResult r;
3547 bool release_lock = false;
3549 rcu_read_lock();
3550 mr = address_space_translate(as, addr, &addr1, &l, true);
3551 if (l < 2 || !memory_access_is_direct(mr, true)) {
3552 release_lock |= prepare_mmio_access(mr);
3554 #if defined(TARGET_WORDS_BIGENDIAN)
3555 if (endian == DEVICE_LITTLE_ENDIAN) {
3556 val = bswap16(val);
3558 #else
3559 if (endian == DEVICE_BIG_ENDIAN) {
3560 val = bswap16(val);
3562 #endif
3563 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3564 } else {
3565 /* RAM case */
3566 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3567 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3568 switch (endian) {
3569 case DEVICE_LITTLE_ENDIAN:
3570 stw_le_p(ptr, val);
3571 break;
3572 case DEVICE_BIG_ENDIAN:
3573 stw_be_p(ptr, val);
3574 break;
3575 default:
3576 stw_p(ptr, val);
3577 break;
3579 invalidate_and_set_dirty(mr, addr1, 2);
3580 r = MEMTX_OK;
3582 if (result) {
3583 *result = r;
3585 if (release_lock) {
3586 qemu_mutex_unlock_iothread();
3588 rcu_read_unlock();
3591 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3592 MemTxAttrs attrs, MemTxResult *result)
3594 address_space_stw_internal(as, addr, val, attrs, result,
3595 DEVICE_NATIVE_ENDIAN);
3598 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3599 MemTxAttrs attrs, MemTxResult *result)
3601 address_space_stw_internal(as, addr, val, attrs, result,
3602 DEVICE_LITTLE_ENDIAN);
3605 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3606 MemTxAttrs attrs, MemTxResult *result)
3608 address_space_stw_internal(as, addr, val, attrs, result,
3609 DEVICE_BIG_ENDIAN);
3612 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3614 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3617 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3619 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3622 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3624 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3627 /* XXX: optimize */
3628 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3629 MemTxAttrs attrs, MemTxResult *result)
3631 MemTxResult r;
3632 val = tswap64(val);
3633 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3634 if (result) {
3635 *result = r;
3639 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3640 MemTxAttrs attrs, MemTxResult *result)
3642 MemTxResult r;
3643 val = cpu_to_le64(val);
3644 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3645 if (result) {
3646 *result = r;
3649 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3650 MemTxAttrs attrs, MemTxResult *result)
3652 MemTxResult r;
3653 val = cpu_to_be64(val);
3654 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3655 if (result) {
3656 *result = r;
3660 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3662 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3665 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3667 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3670 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3672 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3675 /* virtual memory access for debug (includes writing to ROM) */
3676 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3677 uint8_t *buf, int len, int is_write)
3679 int l;
3680 hwaddr phys_addr;
3681 target_ulong page;
3683 while (len > 0) {
3684 int asidx;
3685 MemTxAttrs attrs;
3687 page = addr & TARGET_PAGE_MASK;
3688 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3689 asidx = cpu_asidx_from_attrs(cpu, attrs);
3690 /* if no physical page mapped, return an error */
3691 if (phys_addr == -1)
3692 return -1;
3693 l = (page + TARGET_PAGE_SIZE) - addr;
3694 if (l > len)
3695 l = len;
3696 phys_addr += (addr & ~TARGET_PAGE_MASK);
3697 if (is_write) {
3698 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3699 phys_addr, buf, l);
3700 } else {
3701 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3702 MEMTXATTRS_UNSPECIFIED,
3703 buf, l, 0);
3705 len -= l;
3706 buf += l;
3707 addr += l;
3709 return 0;
3713 * Allows code that needs to deal with migration bitmaps etc to still be built
3714 * target independent.
3716 size_t qemu_target_page_bits(void)
3718 return TARGET_PAGE_BITS;
3721 #endif
3724 * A helper function for the _utterly broken_ virtio device model to find out if
3725 * it's running on a big endian machine. Don't do this at home kids!
3727 bool target_words_bigendian(void);
3728 bool target_words_bigendian(void)
3730 #if defined(TARGET_WORDS_BIGENDIAN)
3731 return true;
3732 #else
3733 return false;
3734 #endif
3737 #ifndef CONFIG_USER_ONLY
3738 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3740 MemoryRegion*mr;
3741 hwaddr l = 1;
3742 bool res;
3744 rcu_read_lock();
3745 mr = address_space_translate(&address_space_memory,
3746 phys_addr, &phys_addr, &l, false);
3748 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3749 rcu_read_unlock();
3750 return res;
3753 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3755 RAMBlock *block;
3756 int ret = 0;
3758 rcu_read_lock();
3759 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3760 ret = func(block->idstr, block->host, block->offset,
3761 block->used_length, opaque);
3762 if (ret) {
3763 break;
3766 rcu_read_unlock();
3767 return ret;
3769 #endif