9pfs: factor out virtio_9p_push_and_notify
[qemu.git] / exec.c
blob8718a75b64a5c9147f9ef699e6769ad3f4c6a9a3
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
58 #include "qemu/range.h"
59 #ifndef _WIN32
60 #include "qemu/mmap-alloc.h"
61 #endif
63 //#define DEBUG_SUBPAGE
65 #if !defined(CONFIG_USER_ONLY)
66 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
67 * are protected by the ramlist lock.
69 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
71 static MemoryRegion *system_memory;
72 static MemoryRegion *system_io;
74 AddressSpace address_space_io;
75 AddressSpace address_space_memory;
77 MemoryRegion io_mem_rom, io_mem_notdirty;
78 static MemoryRegion io_mem_unassigned;
80 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
81 #define RAM_PREALLOC (1 << 0)
83 /* RAM is mmap-ed with MAP_SHARED */
84 #define RAM_SHARED (1 << 1)
86 /* Only a portion of RAM (used_length) is actually used, and migrated.
87 * This used_length size can change across reboots.
89 #define RAM_RESIZEABLE (1 << 2)
91 #endif
93 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
94 /* current CPU in the current thread. It is only valid inside
95 cpu_exec() */
96 __thread CPUState *current_cpu;
97 /* 0 = Do not count executed instructions.
98 1 = Precise instruction counting.
99 2 = Adaptive rate instruction counting. */
100 int use_icount;
102 #if !defined(CONFIG_USER_ONLY)
104 typedef struct PhysPageEntry PhysPageEntry;
106 struct PhysPageEntry {
107 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
108 uint32_t skip : 6;
109 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
110 uint32_t ptr : 26;
113 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
115 /* Size of the L2 (and L3, etc) page tables. */
116 #define ADDR_SPACE_BITS 64
118 #define P_L2_BITS 9
119 #define P_L2_SIZE (1 << P_L2_BITS)
121 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
123 typedef PhysPageEntry Node[P_L2_SIZE];
125 typedef struct PhysPageMap {
126 struct rcu_head rcu;
128 unsigned sections_nb;
129 unsigned sections_nb_alloc;
130 unsigned nodes_nb;
131 unsigned nodes_nb_alloc;
132 Node *nodes;
133 MemoryRegionSection *sections;
134 } PhysPageMap;
136 struct AddressSpaceDispatch {
137 struct rcu_head rcu;
139 /* This is a multi-level map on the physical address space.
140 * The bottom level has pointers to MemoryRegionSections.
142 PhysPageEntry phys_map;
143 PhysPageMap map;
144 AddressSpace *as;
147 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
148 typedef struct subpage_t {
149 MemoryRegion iomem;
150 AddressSpace *as;
151 hwaddr base;
152 uint16_t sub_section[TARGET_PAGE_SIZE];
153 } subpage_t;
155 #define PHYS_SECTION_UNASSIGNED 0
156 #define PHYS_SECTION_NOTDIRTY 1
157 #define PHYS_SECTION_ROM 2
158 #define PHYS_SECTION_WATCH 3
160 static void io_mem_init(void);
161 static void memory_map_init(void);
162 static void tcg_commit(MemoryListener *listener);
164 static MemoryRegion io_mem_watch;
167 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
168 * @cpu: the CPU whose AddressSpace this is
169 * @as: the AddressSpace itself
170 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
171 * @tcg_as_listener: listener for tracking changes to the AddressSpace
173 struct CPUAddressSpace {
174 CPUState *cpu;
175 AddressSpace *as;
176 struct AddressSpaceDispatch *memory_dispatch;
177 MemoryListener tcg_as_listener;
180 #endif
182 #if !defined(CONFIG_USER_ONLY)
184 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
186 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
187 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
188 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
189 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
193 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
195 unsigned i;
196 uint32_t ret;
197 PhysPageEntry e;
198 PhysPageEntry *p;
200 ret = map->nodes_nb++;
201 p = map->nodes[ret];
202 assert(ret != PHYS_MAP_NODE_NIL);
203 assert(ret != map->nodes_nb_alloc);
205 e.skip = leaf ? 0 : 1;
206 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
207 for (i = 0; i < P_L2_SIZE; ++i) {
208 memcpy(&p[i], &e, sizeof(e));
210 return ret;
213 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
214 hwaddr *index, hwaddr *nb, uint16_t leaf,
215 int level)
217 PhysPageEntry *p;
218 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
220 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
221 lp->ptr = phys_map_node_alloc(map, level == 0);
223 p = map->nodes[lp->ptr];
224 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
226 while (*nb && lp < &p[P_L2_SIZE]) {
227 if ((*index & (step - 1)) == 0 && *nb >= step) {
228 lp->skip = 0;
229 lp->ptr = leaf;
230 *index += step;
231 *nb -= step;
232 } else {
233 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
235 ++lp;
239 static void phys_page_set(AddressSpaceDispatch *d,
240 hwaddr index, hwaddr nb,
241 uint16_t leaf)
243 /* Wildly overreserve - it doesn't matter much. */
244 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
246 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
249 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
250 * and update our entry so we can skip it and go directly to the destination.
252 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
254 unsigned valid_ptr = P_L2_SIZE;
255 int valid = 0;
256 PhysPageEntry *p;
257 int i;
259 if (lp->ptr == PHYS_MAP_NODE_NIL) {
260 return;
263 p = nodes[lp->ptr];
264 for (i = 0; i < P_L2_SIZE; i++) {
265 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
266 continue;
269 valid_ptr = i;
270 valid++;
271 if (p[i].skip) {
272 phys_page_compact(&p[i], nodes, compacted);
276 /* We can only compress if there's only one child. */
277 if (valid != 1) {
278 return;
281 assert(valid_ptr < P_L2_SIZE);
283 /* Don't compress if it won't fit in the # of bits we have. */
284 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
285 return;
288 lp->ptr = p[valid_ptr].ptr;
289 if (!p[valid_ptr].skip) {
290 /* If our only child is a leaf, make this a leaf. */
291 /* By design, we should have made this node a leaf to begin with so we
292 * should never reach here.
293 * But since it's so simple to handle this, let's do it just in case we
294 * change this rule.
296 lp->skip = 0;
297 } else {
298 lp->skip += p[valid_ptr].skip;
302 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
304 DECLARE_BITMAP(compacted, nodes_nb);
306 if (d->phys_map.skip) {
307 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
311 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
312 Node *nodes, MemoryRegionSection *sections)
314 PhysPageEntry *p;
315 hwaddr index = addr >> TARGET_PAGE_BITS;
316 int i;
318 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
319 if (lp.ptr == PHYS_MAP_NODE_NIL) {
320 return &sections[PHYS_SECTION_UNASSIGNED];
322 p = nodes[lp.ptr];
323 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
326 if (sections[lp.ptr].size.hi ||
327 range_covers_byte(sections[lp.ptr].offset_within_address_space,
328 sections[lp.ptr].size.lo, addr)) {
329 return &sections[lp.ptr];
330 } else {
331 return &sections[PHYS_SECTION_UNASSIGNED];
335 bool memory_region_is_unassigned(MemoryRegion *mr)
337 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
338 && mr != &io_mem_watch;
341 /* Called from RCU critical section */
342 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
343 hwaddr addr,
344 bool resolve_subpage)
346 MemoryRegionSection *section;
347 subpage_t *subpage;
349 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
350 if (resolve_subpage && section->mr->subpage) {
351 subpage = container_of(section->mr, subpage_t, iomem);
352 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
354 return section;
357 /* Called from RCU critical section */
358 static MemoryRegionSection *
359 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
360 hwaddr *plen, bool resolve_subpage)
362 MemoryRegionSection *section;
363 MemoryRegion *mr;
364 Int128 diff;
366 section = address_space_lookup_region(d, addr, resolve_subpage);
367 /* Compute offset within MemoryRegionSection */
368 addr -= section->offset_within_address_space;
370 /* Compute offset within MemoryRegion */
371 *xlat = addr + section->offset_within_region;
373 mr = section->mr;
375 /* MMIO registers can be expected to perform full-width accesses based only
376 * on their address, without considering adjacent registers that could
377 * decode to completely different MemoryRegions. When such registers
378 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
379 * regions overlap wildly. For this reason we cannot clamp the accesses
380 * here.
382 * If the length is small (as is the case for address_space_ldl/stl),
383 * everything works fine. If the incoming length is large, however,
384 * the caller really has to do the clamping through memory_access_size.
386 if (memory_region_is_ram(mr)) {
387 diff = int128_sub(section->size, int128_make64(addr));
388 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
390 return section;
393 /* Called from RCU critical section */
394 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
395 hwaddr *xlat, hwaddr *plen,
396 bool is_write)
398 IOMMUTLBEntry iotlb;
399 MemoryRegionSection *section;
400 MemoryRegion *mr;
402 for (;;) {
403 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
404 section = address_space_translate_internal(d, addr, &addr, plen, true);
405 mr = section->mr;
407 if (!mr->iommu_ops) {
408 break;
411 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
412 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
413 | (addr & iotlb.addr_mask));
414 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
415 if (!(iotlb.perm & (1 << is_write))) {
416 mr = &io_mem_unassigned;
417 break;
420 as = iotlb.target_as;
423 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
424 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
425 *plen = MIN(page, *plen);
428 *xlat = addr;
429 return mr;
432 /* Called from RCU critical section */
433 MemoryRegionSection *
434 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
435 hwaddr *xlat, hwaddr *plen)
437 MemoryRegionSection *section;
438 section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
439 addr, xlat, plen, false);
441 assert(!section->mr->iommu_ops);
442 return section;
444 #endif
446 #if !defined(CONFIG_USER_ONLY)
448 static int cpu_common_post_load(void *opaque, int version_id)
450 CPUState *cpu = opaque;
452 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
453 version_id is increased. */
454 cpu->interrupt_request &= ~0x01;
455 tlb_flush(cpu, 1);
457 return 0;
460 static int cpu_common_pre_load(void *opaque)
462 CPUState *cpu = opaque;
464 cpu->exception_index = -1;
466 return 0;
469 static bool cpu_common_exception_index_needed(void *opaque)
471 CPUState *cpu = opaque;
473 return tcg_enabled() && cpu->exception_index != -1;
476 static const VMStateDescription vmstate_cpu_common_exception_index = {
477 .name = "cpu_common/exception_index",
478 .version_id = 1,
479 .minimum_version_id = 1,
480 .needed = cpu_common_exception_index_needed,
481 .fields = (VMStateField[]) {
482 VMSTATE_INT32(exception_index, CPUState),
483 VMSTATE_END_OF_LIST()
487 static bool cpu_common_crash_occurred_needed(void *opaque)
489 CPUState *cpu = opaque;
491 return cpu->crash_occurred;
494 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
495 .name = "cpu_common/crash_occurred",
496 .version_id = 1,
497 .minimum_version_id = 1,
498 .needed = cpu_common_crash_occurred_needed,
499 .fields = (VMStateField[]) {
500 VMSTATE_BOOL(crash_occurred, CPUState),
501 VMSTATE_END_OF_LIST()
505 const VMStateDescription vmstate_cpu_common = {
506 .name = "cpu_common",
507 .version_id = 1,
508 .minimum_version_id = 1,
509 .pre_load = cpu_common_pre_load,
510 .post_load = cpu_common_post_load,
511 .fields = (VMStateField[]) {
512 VMSTATE_UINT32(halted, CPUState),
513 VMSTATE_UINT32(interrupt_request, CPUState),
514 VMSTATE_END_OF_LIST()
516 .subsections = (const VMStateDescription*[]) {
517 &vmstate_cpu_common_exception_index,
518 &vmstate_cpu_common_crash_occurred,
519 NULL
523 #endif
525 CPUState *qemu_get_cpu(int index)
527 CPUState *cpu;
529 CPU_FOREACH(cpu) {
530 if (cpu->cpu_index == index) {
531 return cpu;
535 return NULL;
538 #if !defined(CONFIG_USER_ONLY)
539 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
541 /* We only support one address space per cpu at the moment. */
542 assert(cpu->as == as);
544 if (cpu->cpu_ases) {
545 /* We've already registered the listener for our only AS */
546 return;
549 cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
550 cpu->cpu_ases[0].cpu = cpu;
551 cpu->cpu_ases[0].as = as;
552 cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
553 memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
555 #endif
557 #ifndef CONFIG_USER_ONLY
558 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
560 static int cpu_get_free_index(Error **errp)
562 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
564 if (cpu >= MAX_CPUMASK_BITS) {
565 error_setg(errp, "Trying to use more CPUs than max of %d",
566 MAX_CPUMASK_BITS);
567 return -1;
570 bitmap_set(cpu_index_map, cpu, 1);
571 return cpu;
574 void cpu_exec_exit(CPUState *cpu)
576 if (cpu->cpu_index == -1) {
577 /* cpu_index was never allocated by this @cpu or was already freed. */
578 return;
581 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
582 cpu->cpu_index = -1;
584 #else
586 static int cpu_get_free_index(Error **errp)
588 CPUState *some_cpu;
589 int cpu_index = 0;
591 CPU_FOREACH(some_cpu) {
592 cpu_index++;
594 return cpu_index;
597 void cpu_exec_exit(CPUState *cpu)
600 #endif
602 void cpu_exec_init(CPUState *cpu, Error **errp)
604 CPUClass *cc = CPU_GET_CLASS(cpu);
605 int cpu_index;
606 Error *local_err = NULL;
608 #ifndef CONFIG_USER_ONLY
609 cpu->as = &address_space_memory;
610 cpu->thread_id = qemu_get_thread_id();
611 #endif
613 #if defined(CONFIG_USER_ONLY)
614 cpu_list_lock();
615 #endif
616 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
617 if (local_err) {
618 error_propagate(errp, local_err);
619 #if defined(CONFIG_USER_ONLY)
620 cpu_list_unlock();
621 #endif
622 return;
624 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
625 #if defined(CONFIG_USER_ONLY)
626 cpu_list_unlock();
627 #endif
628 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
629 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
631 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
632 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
633 cpu_save, cpu_load, cpu->env_ptr);
634 assert(cc->vmsd == NULL);
635 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
636 #endif
637 if (cc->vmsd != NULL) {
638 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
642 #if defined(CONFIG_USER_ONLY)
643 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
645 tb_invalidate_phys_page_range(pc, pc + 1, 0);
647 #else
648 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
650 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
651 if (phys != -1) {
652 tb_invalidate_phys_addr(cpu->as,
653 phys | (pc & ~TARGET_PAGE_MASK));
656 #endif
658 #if defined(CONFIG_USER_ONLY)
659 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
664 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
665 int flags)
667 return -ENOSYS;
670 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
674 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
675 int flags, CPUWatchpoint **watchpoint)
677 return -ENOSYS;
679 #else
680 /* Add a watchpoint. */
681 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
682 int flags, CPUWatchpoint **watchpoint)
684 CPUWatchpoint *wp;
686 /* forbid ranges which are empty or run off the end of the address space */
687 if (len == 0 || (addr + len - 1) < addr) {
688 error_report("tried to set invalid watchpoint at %"
689 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
690 return -EINVAL;
692 wp = g_malloc(sizeof(*wp));
694 wp->vaddr = addr;
695 wp->len = len;
696 wp->flags = flags;
698 /* keep all GDB-injected watchpoints in front */
699 if (flags & BP_GDB) {
700 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
701 } else {
702 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
705 tlb_flush_page(cpu, addr);
707 if (watchpoint)
708 *watchpoint = wp;
709 return 0;
712 /* Remove a specific watchpoint. */
713 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
714 int flags)
716 CPUWatchpoint *wp;
718 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
719 if (addr == wp->vaddr && len == wp->len
720 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
721 cpu_watchpoint_remove_by_ref(cpu, wp);
722 return 0;
725 return -ENOENT;
728 /* Remove a specific watchpoint by reference. */
729 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
731 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
733 tlb_flush_page(cpu, watchpoint->vaddr);
735 g_free(watchpoint);
738 /* Remove all matching watchpoints. */
739 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
741 CPUWatchpoint *wp, *next;
743 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
744 if (wp->flags & mask) {
745 cpu_watchpoint_remove_by_ref(cpu, wp);
750 /* Return true if this watchpoint address matches the specified
751 * access (ie the address range covered by the watchpoint overlaps
752 * partially or completely with the address range covered by the
753 * access).
755 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
756 vaddr addr,
757 vaddr len)
759 /* We know the lengths are non-zero, but a little caution is
760 * required to avoid errors in the case where the range ends
761 * exactly at the top of the address space and so addr + len
762 * wraps round to zero.
764 vaddr wpend = wp->vaddr + wp->len - 1;
765 vaddr addrend = addr + len - 1;
767 return !(addr > wpend || wp->vaddr > addrend);
770 #endif
772 /* Add a breakpoint. */
773 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
774 CPUBreakpoint **breakpoint)
776 CPUBreakpoint *bp;
778 bp = g_malloc(sizeof(*bp));
780 bp->pc = pc;
781 bp->flags = flags;
783 /* keep all GDB-injected breakpoints in front */
784 if (flags & BP_GDB) {
785 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
786 } else {
787 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
790 breakpoint_invalidate(cpu, pc);
792 if (breakpoint) {
793 *breakpoint = bp;
795 return 0;
798 /* Remove a specific breakpoint. */
799 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
801 CPUBreakpoint *bp;
803 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
804 if (bp->pc == pc && bp->flags == flags) {
805 cpu_breakpoint_remove_by_ref(cpu, bp);
806 return 0;
809 return -ENOENT;
812 /* Remove a specific breakpoint by reference. */
813 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
815 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
817 breakpoint_invalidate(cpu, breakpoint->pc);
819 g_free(breakpoint);
822 /* Remove all matching breakpoints. */
823 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
825 CPUBreakpoint *bp, *next;
827 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
828 if (bp->flags & mask) {
829 cpu_breakpoint_remove_by_ref(cpu, bp);
834 /* enable or disable single step mode. EXCP_DEBUG is returned by the
835 CPU loop after each instruction */
836 void cpu_single_step(CPUState *cpu, int enabled)
838 if (cpu->singlestep_enabled != enabled) {
839 cpu->singlestep_enabled = enabled;
840 if (kvm_enabled()) {
841 kvm_update_guest_debug(cpu, 0);
842 } else {
843 /* must flush all the translated code to avoid inconsistencies */
844 /* XXX: only flush what is necessary */
845 tb_flush(cpu);
850 void cpu_abort(CPUState *cpu, const char *fmt, ...)
852 va_list ap;
853 va_list ap2;
855 va_start(ap, fmt);
856 va_copy(ap2, ap);
857 fprintf(stderr, "qemu: fatal: ");
858 vfprintf(stderr, fmt, ap);
859 fprintf(stderr, "\n");
860 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
861 if (qemu_log_separate()) {
862 qemu_log("qemu: fatal: ");
863 qemu_log_vprintf(fmt, ap2);
864 qemu_log("\n");
865 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
866 qemu_log_flush();
867 qemu_log_close();
869 va_end(ap2);
870 va_end(ap);
871 replay_finish();
872 #if defined(CONFIG_USER_ONLY)
874 struct sigaction act;
875 sigfillset(&act.sa_mask);
876 act.sa_handler = SIG_DFL;
877 sigaction(SIGABRT, &act, NULL);
879 #endif
880 abort();
883 #if !defined(CONFIG_USER_ONLY)
884 /* Called from RCU critical section */
885 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
887 RAMBlock *block;
889 block = atomic_rcu_read(&ram_list.mru_block);
890 if (block && addr - block->offset < block->max_length) {
891 return block;
893 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
894 if (addr - block->offset < block->max_length) {
895 goto found;
899 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
900 abort();
902 found:
903 /* It is safe to write mru_block outside the iothread lock. This
904 * is what happens:
906 * mru_block = xxx
907 * rcu_read_unlock()
908 * xxx removed from list
909 * rcu_read_lock()
910 * read mru_block
911 * mru_block = NULL;
912 * call_rcu(reclaim_ramblock, xxx);
913 * rcu_read_unlock()
915 * atomic_rcu_set is not needed here. The block was already published
916 * when it was placed into the list. Here we're just making an extra
917 * copy of the pointer.
919 ram_list.mru_block = block;
920 return block;
923 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
925 CPUState *cpu;
926 ram_addr_t start1;
927 RAMBlock *block;
928 ram_addr_t end;
930 end = TARGET_PAGE_ALIGN(start + length);
931 start &= TARGET_PAGE_MASK;
933 rcu_read_lock();
934 block = qemu_get_ram_block(start);
935 assert(block == qemu_get_ram_block(end - 1));
936 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
937 CPU_FOREACH(cpu) {
938 tlb_reset_dirty(cpu, start1, length);
940 rcu_read_unlock();
943 /* Note: start and end must be within the same ram block. */
944 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
945 ram_addr_t length,
946 unsigned client)
948 unsigned long end, page;
949 bool dirty;
951 if (length == 0) {
952 return false;
955 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
956 page = start >> TARGET_PAGE_BITS;
957 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
958 page, end - page);
960 if (dirty && tcg_enabled()) {
961 tlb_reset_dirty_range_all(start, length);
964 return dirty;
967 /* Called from RCU critical section */
968 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
969 MemoryRegionSection *section,
970 target_ulong vaddr,
971 hwaddr paddr, hwaddr xlat,
972 int prot,
973 target_ulong *address)
975 hwaddr iotlb;
976 CPUWatchpoint *wp;
978 if (memory_region_is_ram(section->mr)) {
979 /* Normal RAM. */
980 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
981 + xlat;
982 if (!section->readonly) {
983 iotlb |= PHYS_SECTION_NOTDIRTY;
984 } else {
985 iotlb |= PHYS_SECTION_ROM;
987 } else {
988 AddressSpaceDispatch *d;
990 d = atomic_rcu_read(&section->address_space->dispatch);
991 iotlb = section - d->map.sections;
992 iotlb += xlat;
995 /* Make accesses to pages with watchpoints go via the
996 watchpoint trap routines. */
997 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
998 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
999 /* Avoid trapping reads of pages with a write breakpoint. */
1000 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1001 iotlb = PHYS_SECTION_WATCH + paddr;
1002 *address |= TLB_MMIO;
1003 break;
1008 return iotlb;
1010 #endif /* defined(CONFIG_USER_ONLY) */
1012 #if !defined(CONFIG_USER_ONLY)
1014 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1015 uint16_t section);
1016 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1018 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1019 qemu_anon_ram_alloc;
1022 * Set a custom physical guest memory alloator.
1023 * Accelerators with unusual needs may need this. Hopefully, we can
1024 * get rid of it eventually.
1026 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1028 phys_mem_alloc = alloc;
1031 static uint16_t phys_section_add(PhysPageMap *map,
1032 MemoryRegionSection *section)
1034 /* The physical section number is ORed with a page-aligned
1035 * pointer to produce the iotlb entries. Thus it should
1036 * never overflow into the page-aligned value.
1038 assert(map->sections_nb < TARGET_PAGE_SIZE);
1040 if (map->sections_nb == map->sections_nb_alloc) {
1041 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1042 map->sections = g_renew(MemoryRegionSection, map->sections,
1043 map->sections_nb_alloc);
1045 map->sections[map->sections_nb] = *section;
1046 memory_region_ref(section->mr);
1047 return map->sections_nb++;
1050 static void phys_section_destroy(MemoryRegion *mr)
1052 bool have_sub_page = mr->subpage;
1054 memory_region_unref(mr);
1056 if (have_sub_page) {
1057 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1058 object_unref(OBJECT(&subpage->iomem));
1059 g_free(subpage);
1063 static void phys_sections_free(PhysPageMap *map)
1065 while (map->sections_nb > 0) {
1066 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1067 phys_section_destroy(section->mr);
1069 g_free(map->sections);
1070 g_free(map->nodes);
1073 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1075 subpage_t *subpage;
1076 hwaddr base = section->offset_within_address_space
1077 & TARGET_PAGE_MASK;
1078 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1079 d->map.nodes, d->map.sections);
1080 MemoryRegionSection subsection = {
1081 .offset_within_address_space = base,
1082 .size = int128_make64(TARGET_PAGE_SIZE),
1084 hwaddr start, end;
1086 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1088 if (!(existing->mr->subpage)) {
1089 subpage = subpage_init(d->as, base);
1090 subsection.address_space = d->as;
1091 subsection.mr = &subpage->iomem;
1092 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1093 phys_section_add(&d->map, &subsection));
1094 } else {
1095 subpage = container_of(existing->mr, subpage_t, iomem);
1097 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1098 end = start + int128_get64(section->size) - 1;
1099 subpage_register(subpage, start, end,
1100 phys_section_add(&d->map, section));
1104 static void register_multipage(AddressSpaceDispatch *d,
1105 MemoryRegionSection *section)
1107 hwaddr start_addr = section->offset_within_address_space;
1108 uint16_t section_index = phys_section_add(&d->map, section);
1109 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1110 TARGET_PAGE_BITS));
1112 assert(num_pages);
1113 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1116 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1118 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1119 AddressSpaceDispatch *d = as->next_dispatch;
1120 MemoryRegionSection now = *section, remain = *section;
1121 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1123 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1124 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1125 - now.offset_within_address_space;
1127 now.size = int128_min(int128_make64(left), now.size);
1128 register_subpage(d, &now);
1129 } else {
1130 now.size = int128_zero();
1132 while (int128_ne(remain.size, now.size)) {
1133 remain.size = int128_sub(remain.size, now.size);
1134 remain.offset_within_address_space += int128_get64(now.size);
1135 remain.offset_within_region += int128_get64(now.size);
1136 now = remain;
1137 if (int128_lt(remain.size, page_size)) {
1138 register_subpage(d, &now);
1139 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1140 now.size = page_size;
1141 register_subpage(d, &now);
1142 } else {
1143 now.size = int128_and(now.size, int128_neg(page_size));
1144 register_multipage(d, &now);
1149 void qemu_flush_coalesced_mmio_buffer(void)
1151 if (kvm_enabled())
1152 kvm_flush_coalesced_mmio_buffer();
1155 void qemu_mutex_lock_ramlist(void)
1157 qemu_mutex_lock(&ram_list.mutex);
1160 void qemu_mutex_unlock_ramlist(void)
1162 qemu_mutex_unlock(&ram_list.mutex);
1165 #ifdef __linux__
1167 #include <sys/vfs.h>
1169 #define HUGETLBFS_MAGIC 0x958458f6
1171 static long gethugepagesize(const char *path, Error **errp)
1173 struct statfs fs;
1174 int ret;
1176 do {
1177 ret = statfs(path, &fs);
1178 } while (ret != 0 && errno == EINTR);
1180 if (ret != 0) {
1181 error_setg_errno(errp, errno, "failed to get page size of file %s",
1182 path);
1183 return 0;
1186 return fs.f_bsize;
1189 static void *file_ram_alloc(RAMBlock *block,
1190 ram_addr_t memory,
1191 const char *path,
1192 Error **errp)
1194 struct stat st;
1195 char *filename;
1196 char *sanitized_name;
1197 char *c;
1198 void *area;
1199 int fd;
1200 uint64_t hpagesize;
1201 Error *local_err = NULL;
1203 hpagesize = gethugepagesize(path, &local_err);
1204 if (local_err) {
1205 error_propagate(errp, local_err);
1206 goto error;
1208 block->mr->align = hpagesize;
1210 if (memory < hpagesize) {
1211 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1212 "or larger than huge page size 0x%" PRIx64,
1213 memory, hpagesize);
1214 goto error;
1217 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1218 error_setg(errp,
1219 "host lacks kvm mmu notifiers, -mem-path unsupported");
1220 goto error;
1223 if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1224 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1225 sanitized_name = g_strdup(memory_region_name(block->mr));
1226 for (c = sanitized_name; *c != '\0'; c++) {
1227 if (*c == '/') {
1228 *c = '_';
1232 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1233 sanitized_name);
1234 g_free(sanitized_name);
1236 fd = mkstemp(filename);
1237 if (fd >= 0) {
1238 unlink(filename);
1240 g_free(filename);
1241 } else {
1242 fd = open(path, O_RDWR | O_CREAT, 0644);
1245 if (fd < 0) {
1246 error_setg_errno(errp, errno,
1247 "unable to create backing store for hugepages");
1248 goto error;
1251 memory = ROUND_UP(memory, hpagesize);
1254 * ftruncate is not supported by hugetlbfs in older
1255 * hosts, so don't bother bailing out on errors.
1256 * If anything goes wrong with it under other filesystems,
1257 * mmap will fail.
1259 if (ftruncate(fd, memory)) {
1260 perror("ftruncate");
1263 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1264 if (area == MAP_FAILED) {
1265 error_setg_errno(errp, errno,
1266 "unable to map backing store for hugepages");
1267 close(fd);
1268 goto error;
1271 if (mem_prealloc) {
1272 os_mem_prealloc(fd, area, memory);
1275 block->fd = fd;
1276 return area;
1278 error:
1279 return NULL;
1281 #endif
1283 /* Called with the ramlist lock held. */
1284 static ram_addr_t find_ram_offset(ram_addr_t size)
1286 RAMBlock *block, *next_block;
1287 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1289 assert(size != 0); /* it would hand out same offset multiple times */
1291 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1292 return 0;
1295 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1296 ram_addr_t end, next = RAM_ADDR_MAX;
1298 end = block->offset + block->max_length;
1300 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1301 if (next_block->offset >= end) {
1302 next = MIN(next, next_block->offset);
1305 if (next - end >= size && next - end < mingap) {
1306 offset = end;
1307 mingap = next - end;
1311 if (offset == RAM_ADDR_MAX) {
1312 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1313 (uint64_t)size);
1314 abort();
1317 return offset;
1320 ram_addr_t last_ram_offset(void)
1322 RAMBlock *block;
1323 ram_addr_t last = 0;
1325 rcu_read_lock();
1326 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1327 last = MAX(last, block->offset + block->max_length);
1329 rcu_read_unlock();
1330 return last;
1333 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1335 int ret;
1337 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1338 if (!machine_dump_guest_core(current_machine)) {
1339 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1340 if (ret) {
1341 perror("qemu_madvise");
1342 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1343 "but dump_guest_core=off specified\n");
1348 /* Called within an RCU critical section, or while the ramlist lock
1349 * is held.
1351 static RAMBlock *find_ram_block(ram_addr_t addr)
1353 RAMBlock *block;
1355 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1356 if (block->offset == addr) {
1357 return block;
1361 return NULL;
1364 const char *qemu_ram_get_idstr(RAMBlock *rb)
1366 return rb->idstr;
1369 /* Called with iothread lock held. */
1370 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1372 RAMBlock *new_block, *block;
1374 rcu_read_lock();
1375 new_block = find_ram_block(addr);
1376 assert(new_block);
1377 assert(!new_block->idstr[0]);
1379 if (dev) {
1380 char *id = qdev_get_dev_path(dev);
1381 if (id) {
1382 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1383 g_free(id);
1386 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1388 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1389 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1390 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1391 new_block->idstr);
1392 abort();
1395 rcu_read_unlock();
1398 /* Called with iothread lock held. */
1399 void qemu_ram_unset_idstr(ram_addr_t addr)
1401 RAMBlock *block;
1403 /* FIXME: arch_init.c assumes that this is not called throughout
1404 * migration. Ignore the problem since hot-unplug during migration
1405 * does not work anyway.
1408 rcu_read_lock();
1409 block = find_ram_block(addr);
1410 if (block) {
1411 memset(block->idstr, 0, sizeof(block->idstr));
1413 rcu_read_unlock();
1416 static int memory_try_enable_merging(void *addr, size_t len)
1418 if (!machine_mem_merge(current_machine)) {
1419 /* disabled by the user */
1420 return 0;
1423 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1426 /* Only legal before guest might have detected the memory size: e.g. on
1427 * incoming migration, or right after reset.
1429 * As memory core doesn't know how is memory accessed, it is up to
1430 * resize callback to update device state and/or add assertions to detect
1431 * misuse, if necessary.
1433 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1435 RAMBlock *block = find_ram_block(base);
1437 assert(block);
1439 newsize = HOST_PAGE_ALIGN(newsize);
1441 if (block->used_length == newsize) {
1442 return 0;
1445 if (!(block->flags & RAM_RESIZEABLE)) {
1446 error_setg_errno(errp, EINVAL,
1447 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1448 " in != 0x" RAM_ADDR_FMT, block->idstr,
1449 newsize, block->used_length);
1450 return -EINVAL;
1453 if (block->max_length < newsize) {
1454 error_setg_errno(errp, EINVAL,
1455 "Length too large: %s: 0x" RAM_ADDR_FMT
1456 " > 0x" RAM_ADDR_FMT, block->idstr,
1457 newsize, block->max_length);
1458 return -EINVAL;
1461 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1462 block->used_length = newsize;
1463 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1464 DIRTY_CLIENTS_ALL);
1465 memory_region_set_size(block->mr, newsize);
1466 if (block->resized) {
1467 block->resized(block->idstr, newsize, block->host);
1469 return 0;
1472 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1474 RAMBlock *block;
1475 RAMBlock *last_block = NULL;
1476 ram_addr_t old_ram_size, new_ram_size;
1478 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1480 qemu_mutex_lock_ramlist();
1481 new_block->offset = find_ram_offset(new_block->max_length);
1483 if (!new_block->host) {
1484 if (xen_enabled()) {
1485 xen_ram_alloc(new_block->offset, new_block->max_length,
1486 new_block->mr);
1487 } else {
1488 new_block->host = phys_mem_alloc(new_block->max_length,
1489 &new_block->mr->align);
1490 if (!new_block->host) {
1491 error_setg_errno(errp, errno,
1492 "cannot set up guest memory '%s'",
1493 memory_region_name(new_block->mr));
1494 qemu_mutex_unlock_ramlist();
1495 return -1;
1497 memory_try_enable_merging(new_block->host, new_block->max_length);
1501 new_ram_size = MAX(old_ram_size,
1502 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1503 if (new_ram_size > old_ram_size) {
1504 migration_bitmap_extend(old_ram_size, new_ram_size);
1506 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1507 * QLIST (which has an RCU-friendly variant) does not have insertion at
1508 * tail, so save the last element in last_block.
1510 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1511 last_block = block;
1512 if (block->max_length < new_block->max_length) {
1513 break;
1516 if (block) {
1517 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1518 } else if (last_block) {
1519 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1520 } else { /* list is empty */
1521 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1523 ram_list.mru_block = NULL;
1525 /* Write list before version */
1526 smp_wmb();
1527 ram_list.version++;
1528 qemu_mutex_unlock_ramlist();
1530 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1532 if (new_ram_size > old_ram_size) {
1533 int i;
1535 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1536 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1537 ram_list.dirty_memory[i] =
1538 bitmap_zero_extend(ram_list.dirty_memory[i],
1539 old_ram_size, new_ram_size);
1542 cpu_physical_memory_set_dirty_range(new_block->offset,
1543 new_block->used_length,
1544 DIRTY_CLIENTS_ALL);
1546 if (new_block->host) {
1547 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1548 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1549 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1550 if (kvm_enabled()) {
1551 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1555 return new_block->offset;
1558 #ifdef __linux__
1559 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1560 bool share, const char *mem_path,
1561 Error **errp)
1563 RAMBlock *new_block;
1564 ram_addr_t addr;
1565 Error *local_err = NULL;
1567 if (xen_enabled()) {
1568 error_setg(errp, "-mem-path not supported with Xen");
1569 return -1;
1572 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1574 * file_ram_alloc() needs to allocate just like
1575 * phys_mem_alloc, but we haven't bothered to provide
1576 * a hook there.
1578 error_setg(errp,
1579 "-mem-path not supported with this accelerator");
1580 return -1;
1583 size = HOST_PAGE_ALIGN(size);
1584 new_block = g_malloc0(sizeof(*new_block));
1585 new_block->mr = mr;
1586 new_block->used_length = size;
1587 new_block->max_length = size;
1588 new_block->flags = share ? RAM_SHARED : 0;
1589 new_block->host = file_ram_alloc(new_block, size,
1590 mem_path, errp);
1591 if (!new_block->host) {
1592 g_free(new_block);
1593 return -1;
1596 addr = ram_block_add(new_block, &local_err);
1597 if (local_err) {
1598 g_free(new_block);
1599 error_propagate(errp, local_err);
1600 return -1;
1602 return addr;
1604 #endif
1606 static
1607 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1608 void (*resized)(const char*,
1609 uint64_t length,
1610 void *host),
1611 void *host, bool resizeable,
1612 MemoryRegion *mr, Error **errp)
1614 RAMBlock *new_block;
1615 ram_addr_t addr;
1616 Error *local_err = NULL;
1618 size = HOST_PAGE_ALIGN(size);
1619 max_size = HOST_PAGE_ALIGN(max_size);
1620 new_block = g_malloc0(sizeof(*new_block));
1621 new_block->mr = mr;
1622 new_block->resized = resized;
1623 new_block->used_length = size;
1624 new_block->max_length = max_size;
1625 assert(max_size >= size);
1626 new_block->fd = -1;
1627 new_block->host = host;
1628 if (host) {
1629 new_block->flags |= RAM_PREALLOC;
1631 if (resizeable) {
1632 new_block->flags |= RAM_RESIZEABLE;
1634 addr = ram_block_add(new_block, &local_err);
1635 if (local_err) {
1636 g_free(new_block);
1637 error_propagate(errp, local_err);
1638 return -1;
1640 return addr;
1643 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1644 MemoryRegion *mr, Error **errp)
1646 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1649 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1651 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1654 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1655 void (*resized)(const char*,
1656 uint64_t length,
1657 void *host),
1658 MemoryRegion *mr, Error **errp)
1660 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1663 static void reclaim_ramblock(RAMBlock *block)
1665 if (block->flags & RAM_PREALLOC) {
1667 } else if (xen_enabled()) {
1668 xen_invalidate_map_cache_entry(block->host);
1669 #ifndef _WIN32
1670 } else if (block->fd >= 0) {
1671 qemu_ram_munmap(block->host, block->max_length);
1672 close(block->fd);
1673 #endif
1674 } else {
1675 qemu_anon_ram_free(block->host, block->max_length);
1677 g_free(block);
1680 void qemu_ram_free(ram_addr_t addr)
1682 RAMBlock *block;
1684 qemu_mutex_lock_ramlist();
1685 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1686 if (addr == block->offset) {
1687 QLIST_REMOVE_RCU(block, next);
1688 ram_list.mru_block = NULL;
1689 /* Write list before version */
1690 smp_wmb();
1691 ram_list.version++;
1692 call_rcu(block, reclaim_ramblock, rcu);
1693 break;
1696 qemu_mutex_unlock_ramlist();
1699 #ifndef _WIN32
1700 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1702 RAMBlock *block;
1703 ram_addr_t offset;
1704 int flags;
1705 void *area, *vaddr;
1707 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1708 offset = addr - block->offset;
1709 if (offset < block->max_length) {
1710 vaddr = ramblock_ptr(block, offset);
1711 if (block->flags & RAM_PREALLOC) {
1713 } else if (xen_enabled()) {
1714 abort();
1715 } else {
1716 flags = MAP_FIXED;
1717 if (block->fd >= 0) {
1718 flags |= (block->flags & RAM_SHARED ?
1719 MAP_SHARED : MAP_PRIVATE);
1720 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1721 flags, block->fd, offset);
1722 } else {
1724 * Remap needs to match alloc. Accelerators that
1725 * set phys_mem_alloc never remap. If they did,
1726 * we'd need a remap hook here.
1728 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1730 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1731 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1732 flags, -1, 0);
1734 if (area != vaddr) {
1735 fprintf(stderr, "Could not remap addr: "
1736 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1737 length, addr);
1738 exit(1);
1740 memory_try_enable_merging(vaddr, length);
1741 qemu_ram_setup_dump(vaddr, length);
1746 #endif /* !_WIN32 */
1748 int qemu_get_ram_fd(ram_addr_t addr)
1750 RAMBlock *block;
1751 int fd;
1753 rcu_read_lock();
1754 block = qemu_get_ram_block(addr);
1755 fd = block->fd;
1756 rcu_read_unlock();
1757 return fd;
1760 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1762 RAMBlock *block;
1763 void *ptr;
1765 rcu_read_lock();
1766 block = qemu_get_ram_block(addr);
1767 ptr = ramblock_ptr(block, 0);
1768 rcu_read_unlock();
1769 return ptr;
1772 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1773 * This should not be used for general purpose DMA. Use address_space_map
1774 * or address_space_rw instead. For local memory (e.g. video ram) that the
1775 * device owns, use memory_region_get_ram_ptr.
1777 * Called within RCU critical section.
1779 void *qemu_get_ram_ptr(ram_addr_t addr)
1781 RAMBlock *block = qemu_get_ram_block(addr);
1783 if (xen_enabled() && block->host == NULL) {
1784 /* We need to check if the requested address is in the RAM
1785 * because we don't want to map the entire memory in QEMU.
1786 * In that case just map until the end of the page.
1788 if (block->offset == 0) {
1789 return xen_map_cache(addr, 0, 0);
1792 block->host = xen_map_cache(block->offset, block->max_length, 1);
1794 return ramblock_ptr(block, addr - block->offset);
1797 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1798 * but takes a size argument.
1800 * Called within RCU critical section.
1802 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1804 RAMBlock *block;
1805 ram_addr_t offset_inside_block;
1806 if (*size == 0) {
1807 return NULL;
1810 block = qemu_get_ram_block(addr);
1811 offset_inside_block = addr - block->offset;
1812 *size = MIN(*size, block->max_length - offset_inside_block);
1814 if (xen_enabled() && block->host == NULL) {
1815 /* We need to check if the requested address is in the RAM
1816 * because we don't want to map the entire memory in QEMU.
1817 * In that case just map the requested area.
1819 if (block->offset == 0) {
1820 return xen_map_cache(addr, *size, 1);
1823 block->host = xen_map_cache(block->offset, block->max_length, 1);
1826 return ramblock_ptr(block, offset_inside_block);
1830 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1831 * in that RAMBlock.
1833 * ptr: Host pointer to look up
1834 * round_offset: If true round the result offset down to a page boundary
1835 * *ram_addr: set to result ram_addr
1836 * *offset: set to result offset within the RAMBlock
1838 * Returns: RAMBlock (or NULL if not found)
1840 * By the time this function returns, the returned pointer is not protected
1841 * by RCU anymore. If the caller is not within an RCU critical section and
1842 * does not hold the iothread lock, it must have other means of protecting the
1843 * pointer, such as a reference to the region that includes the incoming
1844 * ram_addr_t.
1846 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1847 ram_addr_t *ram_addr,
1848 ram_addr_t *offset)
1850 RAMBlock *block;
1851 uint8_t *host = ptr;
1853 if (xen_enabled()) {
1854 rcu_read_lock();
1855 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1856 block = qemu_get_ram_block(*ram_addr);
1857 if (block) {
1858 *offset = (host - block->host);
1860 rcu_read_unlock();
1861 return block;
1864 rcu_read_lock();
1865 block = atomic_rcu_read(&ram_list.mru_block);
1866 if (block && block->host && host - block->host < block->max_length) {
1867 goto found;
1870 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1871 /* This case append when the block is not mapped. */
1872 if (block->host == NULL) {
1873 continue;
1875 if (host - block->host < block->max_length) {
1876 goto found;
1880 rcu_read_unlock();
1881 return NULL;
1883 found:
1884 *offset = (host - block->host);
1885 if (round_offset) {
1886 *offset &= TARGET_PAGE_MASK;
1888 *ram_addr = block->offset + *offset;
1889 rcu_read_unlock();
1890 return block;
1894 * Finds the named RAMBlock
1896 * name: The name of RAMBlock to find
1898 * Returns: RAMBlock (or NULL if not found)
1900 RAMBlock *qemu_ram_block_by_name(const char *name)
1902 RAMBlock *block;
1904 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1905 if (!strcmp(name, block->idstr)) {
1906 return block;
1910 return NULL;
1913 /* Some of the softmmu routines need to translate from a host pointer
1914 (typically a TLB entry) back to a ram offset. */
1915 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1917 RAMBlock *block;
1918 ram_addr_t offset; /* Not used */
1920 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1922 if (!block) {
1923 return NULL;
1926 return block->mr;
1929 /* Called within RCU critical section. */
1930 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1931 uint64_t val, unsigned size)
1933 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1934 tb_invalidate_phys_page_fast(ram_addr, size);
1936 switch (size) {
1937 case 1:
1938 stb_p(qemu_get_ram_ptr(ram_addr), val);
1939 break;
1940 case 2:
1941 stw_p(qemu_get_ram_ptr(ram_addr), val);
1942 break;
1943 case 4:
1944 stl_p(qemu_get_ram_ptr(ram_addr), val);
1945 break;
1946 default:
1947 abort();
1949 /* Set both VGA and migration bits for simplicity and to remove
1950 * the notdirty callback faster.
1952 cpu_physical_memory_set_dirty_range(ram_addr, size,
1953 DIRTY_CLIENTS_NOCODE);
1954 /* we remove the notdirty callback only if the code has been
1955 flushed */
1956 if (!cpu_physical_memory_is_clean(ram_addr)) {
1957 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1961 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1962 unsigned size, bool is_write)
1964 return is_write;
1967 static const MemoryRegionOps notdirty_mem_ops = {
1968 .write = notdirty_mem_write,
1969 .valid.accepts = notdirty_mem_accepts,
1970 .endianness = DEVICE_NATIVE_ENDIAN,
1973 /* Generate a debug exception if a watchpoint has been hit. */
1974 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1976 CPUState *cpu = current_cpu;
1977 CPUArchState *env = cpu->env_ptr;
1978 target_ulong pc, cs_base;
1979 target_ulong vaddr;
1980 CPUWatchpoint *wp;
1981 int cpu_flags;
1983 if (cpu->watchpoint_hit) {
1984 /* We re-entered the check after replacing the TB. Now raise
1985 * the debug interrupt so that is will trigger after the
1986 * current instruction. */
1987 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1988 return;
1990 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1991 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1992 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1993 && (wp->flags & flags)) {
1994 if (flags == BP_MEM_READ) {
1995 wp->flags |= BP_WATCHPOINT_HIT_READ;
1996 } else {
1997 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1999 wp->hitaddr = vaddr;
2000 wp->hitattrs = attrs;
2001 if (!cpu->watchpoint_hit) {
2002 cpu->watchpoint_hit = wp;
2003 tb_check_watchpoint(cpu);
2004 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2005 cpu->exception_index = EXCP_DEBUG;
2006 cpu_loop_exit(cpu);
2007 } else {
2008 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2009 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2010 cpu_resume_from_signal(cpu, NULL);
2013 } else {
2014 wp->flags &= ~BP_WATCHPOINT_HIT;
2019 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2020 so these check for a hit then pass through to the normal out-of-line
2021 phys routines. */
2022 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2023 unsigned size, MemTxAttrs attrs)
2025 MemTxResult res;
2026 uint64_t data;
2028 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2029 switch (size) {
2030 case 1:
2031 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2032 break;
2033 case 2:
2034 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2035 break;
2036 case 4:
2037 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2038 break;
2039 default: abort();
2041 *pdata = data;
2042 return res;
2045 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2046 uint64_t val, unsigned size,
2047 MemTxAttrs attrs)
2049 MemTxResult res;
2051 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2052 switch (size) {
2053 case 1:
2054 address_space_stb(&address_space_memory, addr, val, attrs, &res);
2055 break;
2056 case 2:
2057 address_space_stw(&address_space_memory, addr, val, attrs, &res);
2058 break;
2059 case 4:
2060 address_space_stl(&address_space_memory, addr, val, attrs, &res);
2061 break;
2062 default: abort();
2064 return res;
2067 static const MemoryRegionOps watch_mem_ops = {
2068 .read_with_attrs = watch_mem_read,
2069 .write_with_attrs = watch_mem_write,
2070 .endianness = DEVICE_NATIVE_ENDIAN,
2073 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2074 unsigned len, MemTxAttrs attrs)
2076 subpage_t *subpage = opaque;
2077 uint8_t buf[8];
2078 MemTxResult res;
2080 #if defined(DEBUG_SUBPAGE)
2081 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2082 subpage, len, addr);
2083 #endif
2084 res = address_space_read(subpage->as, addr + subpage->base,
2085 attrs, buf, len);
2086 if (res) {
2087 return res;
2089 switch (len) {
2090 case 1:
2091 *data = ldub_p(buf);
2092 return MEMTX_OK;
2093 case 2:
2094 *data = lduw_p(buf);
2095 return MEMTX_OK;
2096 case 4:
2097 *data = ldl_p(buf);
2098 return MEMTX_OK;
2099 case 8:
2100 *data = ldq_p(buf);
2101 return MEMTX_OK;
2102 default:
2103 abort();
2107 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2108 uint64_t value, unsigned len, MemTxAttrs attrs)
2110 subpage_t *subpage = opaque;
2111 uint8_t buf[8];
2113 #if defined(DEBUG_SUBPAGE)
2114 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2115 " value %"PRIx64"\n",
2116 __func__, subpage, len, addr, value);
2117 #endif
2118 switch (len) {
2119 case 1:
2120 stb_p(buf, value);
2121 break;
2122 case 2:
2123 stw_p(buf, value);
2124 break;
2125 case 4:
2126 stl_p(buf, value);
2127 break;
2128 case 8:
2129 stq_p(buf, value);
2130 break;
2131 default:
2132 abort();
2134 return address_space_write(subpage->as, addr + subpage->base,
2135 attrs, buf, len);
2138 static bool subpage_accepts(void *opaque, hwaddr addr,
2139 unsigned len, bool is_write)
2141 subpage_t *subpage = opaque;
2142 #if defined(DEBUG_SUBPAGE)
2143 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2144 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2145 #endif
2147 return address_space_access_valid(subpage->as, addr + subpage->base,
2148 len, is_write);
2151 static const MemoryRegionOps subpage_ops = {
2152 .read_with_attrs = subpage_read,
2153 .write_with_attrs = subpage_write,
2154 .impl.min_access_size = 1,
2155 .impl.max_access_size = 8,
2156 .valid.min_access_size = 1,
2157 .valid.max_access_size = 8,
2158 .valid.accepts = subpage_accepts,
2159 .endianness = DEVICE_NATIVE_ENDIAN,
2162 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2163 uint16_t section)
2165 int idx, eidx;
2167 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2168 return -1;
2169 idx = SUBPAGE_IDX(start);
2170 eidx = SUBPAGE_IDX(end);
2171 #if defined(DEBUG_SUBPAGE)
2172 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2173 __func__, mmio, start, end, idx, eidx, section);
2174 #endif
2175 for (; idx <= eidx; idx++) {
2176 mmio->sub_section[idx] = section;
2179 return 0;
2182 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2184 subpage_t *mmio;
2186 mmio = g_malloc0(sizeof(subpage_t));
2188 mmio->as = as;
2189 mmio->base = base;
2190 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2191 NULL, TARGET_PAGE_SIZE);
2192 mmio->iomem.subpage = true;
2193 #if defined(DEBUG_SUBPAGE)
2194 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2195 mmio, base, TARGET_PAGE_SIZE);
2196 #endif
2197 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2199 return mmio;
2202 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2203 MemoryRegion *mr)
2205 assert(as);
2206 MemoryRegionSection section = {
2207 .address_space = as,
2208 .mr = mr,
2209 .offset_within_address_space = 0,
2210 .offset_within_region = 0,
2211 .size = int128_2_64(),
2214 return phys_section_add(map, &section);
2217 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2219 CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2220 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2221 MemoryRegionSection *sections = d->map.sections;
2223 return sections[index & ~TARGET_PAGE_MASK].mr;
2226 static void io_mem_init(void)
2228 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2229 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2230 NULL, UINT64_MAX);
2231 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2232 NULL, UINT64_MAX);
2233 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2234 NULL, UINT64_MAX);
2237 static void mem_begin(MemoryListener *listener)
2239 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2240 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2241 uint16_t n;
2243 n = dummy_section(&d->map, as, &io_mem_unassigned);
2244 assert(n == PHYS_SECTION_UNASSIGNED);
2245 n = dummy_section(&d->map, as, &io_mem_notdirty);
2246 assert(n == PHYS_SECTION_NOTDIRTY);
2247 n = dummy_section(&d->map, as, &io_mem_rom);
2248 assert(n == PHYS_SECTION_ROM);
2249 n = dummy_section(&d->map, as, &io_mem_watch);
2250 assert(n == PHYS_SECTION_WATCH);
2252 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2253 d->as = as;
2254 as->next_dispatch = d;
2257 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2259 phys_sections_free(&d->map);
2260 g_free(d);
2263 static void mem_commit(MemoryListener *listener)
2265 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2266 AddressSpaceDispatch *cur = as->dispatch;
2267 AddressSpaceDispatch *next = as->next_dispatch;
2269 phys_page_compact_all(next, next->map.nodes_nb);
2271 atomic_rcu_set(&as->dispatch, next);
2272 if (cur) {
2273 call_rcu(cur, address_space_dispatch_free, rcu);
2277 static void tcg_commit(MemoryListener *listener)
2279 CPUAddressSpace *cpuas;
2280 AddressSpaceDispatch *d;
2282 /* since each CPU stores ram addresses in its TLB cache, we must
2283 reset the modified entries */
2284 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2285 cpu_reloading_memory_map();
2286 /* The CPU and TLB are protected by the iothread lock.
2287 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2288 * may have split the RCU critical section.
2290 d = atomic_rcu_read(&cpuas->as->dispatch);
2291 cpuas->memory_dispatch = d;
2292 tlb_flush(cpuas->cpu, 1);
2295 void address_space_init_dispatch(AddressSpace *as)
2297 as->dispatch = NULL;
2298 as->dispatch_listener = (MemoryListener) {
2299 .begin = mem_begin,
2300 .commit = mem_commit,
2301 .region_add = mem_add,
2302 .region_nop = mem_add,
2303 .priority = 0,
2305 memory_listener_register(&as->dispatch_listener, as);
2308 void address_space_unregister(AddressSpace *as)
2310 memory_listener_unregister(&as->dispatch_listener);
2313 void address_space_destroy_dispatch(AddressSpace *as)
2315 AddressSpaceDispatch *d = as->dispatch;
2317 atomic_rcu_set(&as->dispatch, NULL);
2318 if (d) {
2319 call_rcu(d, address_space_dispatch_free, rcu);
2323 static void memory_map_init(void)
2325 system_memory = g_malloc(sizeof(*system_memory));
2327 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2328 address_space_init(&address_space_memory, system_memory, "memory");
2330 system_io = g_malloc(sizeof(*system_io));
2331 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2332 65536);
2333 address_space_init(&address_space_io, system_io, "I/O");
2336 MemoryRegion *get_system_memory(void)
2338 return system_memory;
2341 MemoryRegion *get_system_io(void)
2343 return system_io;
2346 #endif /* !defined(CONFIG_USER_ONLY) */
2348 /* physical memory access (slow version, mainly for debug) */
2349 #if defined(CONFIG_USER_ONLY)
2350 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2351 uint8_t *buf, int len, int is_write)
2353 int l, flags;
2354 target_ulong page;
2355 void * p;
2357 while (len > 0) {
2358 page = addr & TARGET_PAGE_MASK;
2359 l = (page + TARGET_PAGE_SIZE) - addr;
2360 if (l > len)
2361 l = len;
2362 flags = page_get_flags(page);
2363 if (!(flags & PAGE_VALID))
2364 return -1;
2365 if (is_write) {
2366 if (!(flags & PAGE_WRITE))
2367 return -1;
2368 /* XXX: this code should not depend on lock_user */
2369 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2370 return -1;
2371 memcpy(p, buf, l);
2372 unlock_user(p, addr, l);
2373 } else {
2374 if (!(flags & PAGE_READ))
2375 return -1;
2376 /* XXX: this code should not depend on lock_user */
2377 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2378 return -1;
2379 memcpy(buf, p, l);
2380 unlock_user(p, addr, 0);
2382 len -= l;
2383 buf += l;
2384 addr += l;
2386 return 0;
2389 #else
2391 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2392 hwaddr length)
2394 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2395 /* No early return if dirty_log_mask is or becomes 0, because
2396 * cpu_physical_memory_set_dirty_range will still call
2397 * xen_modified_memory.
2399 if (dirty_log_mask) {
2400 dirty_log_mask =
2401 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2403 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2404 tb_invalidate_phys_range(addr, addr + length);
2405 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2407 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2410 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2412 unsigned access_size_max = mr->ops->valid.max_access_size;
2414 /* Regions are assumed to support 1-4 byte accesses unless
2415 otherwise specified. */
2416 if (access_size_max == 0) {
2417 access_size_max = 4;
2420 /* Bound the maximum access by the alignment of the address. */
2421 if (!mr->ops->impl.unaligned) {
2422 unsigned align_size_max = addr & -addr;
2423 if (align_size_max != 0 && align_size_max < access_size_max) {
2424 access_size_max = align_size_max;
2428 /* Don't attempt accesses larger than the maximum. */
2429 if (l > access_size_max) {
2430 l = access_size_max;
2432 l = pow2floor(l);
2434 return l;
2437 static bool prepare_mmio_access(MemoryRegion *mr)
2439 bool unlocked = !qemu_mutex_iothread_locked();
2440 bool release_lock = false;
2442 if (unlocked && mr->global_locking) {
2443 qemu_mutex_lock_iothread();
2444 unlocked = false;
2445 release_lock = true;
2447 if (mr->flush_coalesced_mmio) {
2448 if (unlocked) {
2449 qemu_mutex_lock_iothread();
2451 qemu_flush_coalesced_mmio_buffer();
2452 if (unlocked) {
2453 qemu_mutex_unlock_iothread();
2457 return release_lock;
2460 /* Called within RCU critical section. */
2461 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2462 MemTxAttrs attrs,
2463 const uint8_t *buf,
2464 int len, hwaddr addr1,
2465 hwaddr l, MemoryRegion *mr)
2467 uint8_t *ptr;
2468 uint64_t val;
2469 MemTxResult result = MEMTX_OK;
2470 bool release_lock = false;
2472 for (;;) {
2473 if (!memory_access_is_direct(mr, true)) {
2474 release_lock |= prepare_mmio_access(mr);
2475 l = memory_access_size(mr, l, addr1);
2476 /* XXX: could force current_cpu to NULL to avoid
2477 potential bugs */
2478 switch (l) {
2479 case 8:
2480 /* 64 bit write access */
2481 val = ldq_p(buf);
2482 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2483 attrs);
2484 break;
2485 case 4:
2486 /* 32 bit write access */
2487 val = ldl_p(buf);
2488 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2489 attrs);
2490 break;
2491 case 2:
2492 /* 16 bit write access */
2493 val = lduw_p(buf);
2494 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2495 attrs);
2496 break;
2497 case 1:
2498 /* 8 bit write access */
2499 val = ldub_p(buf);
2500 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2501 attrs);
2502 break;
2503 default:
2504 abort();
2506 } else {
2507 addr1 += memory_region_get_ram_addr(mr);
2508 /* RAM case */
2509 ptr = qemu_get_ram_ptr(addr1);
2510 memcpy(ptr, buf, l);
2511 invalidate_and_set_dirty(mr, addr1, l);
2514 if (release_lock) {
2515 qemu_mutex_unlock_iothread();
2516 release_lock = false;
2519 len -= l;
2520 buf += l;
2521 addr += l;
2523 if (!len) {
2524 break;
2527 l = len;
2528 mr = address_space_translate(as, addr, &addr1, &l, true);
2531 return result;
2534 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2535 const uint8_t *buf, int len)
2537 hwaddr l;
2538 hwaddr addr1;
2539 MemoryRegion *mr;
2540 MemTxResult result = MEMTX_OK;
2542 if (len > 0) {
2543 rcu_read_lock();
2544 l = len;
2545 mr = address_space_translate(as, addr, &addr1, &l, true);
2546 result = address_space_write_continue(as, addr, attrs, buf, len,
2547 addr1, l, mr);
2548 rcu_read_unlock();
2551 return result;
2554 /* Called within RCU critical section. */
2555 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2556 MemTxAttrs attrs, uint8_t *buf,
2557 int len, hwaddr addr1, hwaddr l,
2558 MemoryRegion *mr)
2560 uint8_t *ptr;
2561 uint64_t val;
2562 MemTxResult result = MEMTX_OK;
2563 bool release_lock = false;
2565 for (;;) {
2566 if (!memory_access_is_direct(mr, false)) {
2567 /* I/O case */
2568 release_lock |= prepare_mmio_access(mr);
2569 l = memory_access_size(mr, l, addr1);
2570 switch (l) {
2571 case 8:
2572 /* 64 bit read access */
2573 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2574 attrs);
2575 stq_p(buf, val);
2576 break;
2577 case 4:
2578 /* 32 bit read access */
2579 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2580 attrs);
2581 stl_p(buf, val);
2582 break;
2583 case 2:
2584 /* 16 bit read access */
2585 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2586 attrs);
2587 stw_p(buf, val);
2588 break;
2589 case 1:
2590 /* 8 bit read access */
2591 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2592 attrs);
2593 stb_p(buf, val);
2594 break;
2595 default:
2596 abort();
2598 } else {
2599 /* RAM case */
2600 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2601 memcpy(buf, ptr, l);
2604 if (release_lock) {
2605 qemu_mutex_unlock_iothread();
2606 release_lock = false;
2609 len -= l;
2610 buf += l;
2611 addr += l;
2613 if (!len) {
2614 break;
2617 l = len;
2618 mr = address_space_translate(as, addr, &addr1, &l, false);
2621 return result;
2624 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2625 MemTxAttrs attrs, uint8_t *buf, int len)
2627 hwaddr l;
2628 hwaddr addr1;
2629 MemoryRegion *mr;
2630 MemTxResult result = MEMTX_OK;
2632 if (len > 0) {
2633 rcu_read_lock();
2634 l = len;
2635 mr = address_space_translate(as, addr, &addr1, &l, false);
2636 result = address_space_read_continue(as, addr, attrs, buf, len,
2637 addr1, l, mr);
2638 rcu_read_unlock();
2641 return result;
2644 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2645 uint8_t *buf, int len, bool is_write)
2647 if (is_write) {
2648 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2649 } else {
2650 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2654 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2655 int len, int is_write)
2657 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2658 buf, len, is_write);
2661 enum write_rom_type {
2662 WRITE_DATA,
2663 FLUSH_CACHE,
2666 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2667 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2669 hwaddr l;
2670 uint8_t *ptr;
2671 hwaddr addr1;
2672 MemoryRegion *mr;
2674 rcu_read_lock();
2675 while (len > 0) {
2676 l = len;
2677 mr = address_space_translate(as, addr, &addr1, &l, true);
2679 if (!(memory_region_is_ram(mr) ||
2680 memory_region_is_romd(mr))) {
2681 l = memory_access_size(mr, l, addr1);
2682 } else {
2683 addr1 += memory_region_get_ram_addr(mr);
2684 /* ROM/RAM case */
2685 ptr = qemu_get_ram_ptr(addr1);
2686 switch (type) {
2687 case WRITE_DATA:
2688 memcpy(ptr, buf, l);
2689 invalidate_and_set_dirty(mr, addr1, l);
2690 break;
2691 case FLUSH_CACHE:
2692 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2693 break;
2696 len -= l;
2697 buf += l;
2698 addr += l;
2700 rcu_read_unlock();
2703 /* used for ROM loading : can write in RAM and ROM */
2704 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2705 const uint8_t *buf, int len)
2707 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2710 void cpu_flush_icache_range(hwaddr start, int len)
2713 * This function should do the same thing as an icache flush that was
2714 * triggered from within the guest. For TCG we are always cache coherent,
2715 * so there is no need to flush anything. For KVM / Xen we need to flush
2716 * the host's instruction cache at least.
2718 if (tcg_enabled()) {
2719 return;
2722 cpu_physical_memory_write_rom_internal(&address_space_memory,
2723 start, NULL, len, FLUSH_CACHE);
2726 typedef struct {
2727 MemoryRegion *mr;
2728 void *buffer;
2729 hwaddr addr;
2730 hwaddr len;
2731 bool in_use;
2732 } BounceBuffer;
2734 static BounceBuffer bounce;
2736 typedef struct MapClient {
2737 QEMUBH *bh;
2738 QLIST_ENTRY(MapClient) link;
2739 } MapClient;
2741 QemuMutex map_client_list_lock;
2742 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2743 = QLIST_HEAD_INITIALIZER(map_client_list);
2745 static void cpu_unregister_map_client_do(MapClient *client)
2747 QLIST_REMOVE(client, link);
2748 g_free(client);
2751 static void cpu_notify_map_clients_locked(void)
2753 MapClient *client;
2755 while (!QLIST_EMPTY(&map_client_list)) {
2756 client = QLIST_FIRST(&map_client_list);
2757 qemu_bh_schedule(client->bh);
2758 cpu_unregister_map_client_do(client);
2762 void cpu_register_map_client(QEMUBH *bh)
2764 MapClient *client = g_malloc(sizeof(*client));
2766 qemu_mutex_lock(&map_client_list_lock);
2767 client->bh = bh;
2768 QLIST_INSERT_HEAD(&map_client_list, client, link);
2769 if (!atomic_read(&bounce.in_use)) {
2770 cpu_notify_map_clients_locked();
2772 qemu_mutex_unlock(&map_client_list_lock);
2775 void cpu_exec_init_all(void)
2777 qemu_mutex_init(&ram_list.mutex);
2778 io_mem_init();
2779 memory_map_init();
2780 qemu_mutex_init(&map_client_list_lock);
2783 void cpu_unregister_map_client(QEMUBH *bh)
2785 MapClient *client;
2787 qemu_mutex_lock(&map_client_list_lock);
2788 QLIST_FOREACH(client, &map_client_list, link) {
2789 if (client->bh == bh) {
2790 cpu_unregister_map_client_do(client);
2791 break;
2794 qemu_mutex_unlock(&map_client_list_lock);
2797 static void cpu_notify_map_clients(void)
2799 qemu_mutex_lock(&map_client_list_lock);
2800 cpu_notify_map_clients_locked();
2801 qemu_mutex_unlock(&map_client_list_lock);
2804 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2806 MemoryRegion *mr;
2807 hwaddr l, xlat;
2809 rcu_read_lock();
2810 while (len > 0) {
2811 l = len;
2812 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2813 if (!memory_access_is_direct(mr, is_write)) {
2814 l = memory_access_size(mr, l, addr);
2815 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2816 return false;
2820 len -= l;
2821 addr += l;
2823 rcu_read_unlock();
2824 return true;
2827 /* Map a physical memory region into a host virtual address.
2828 * May map a subset of the requested range, given by and returned in *plen.
2829 * May return NULL if resources needed to perform the mapping are exhausted.
2830 * Use only for reads OR writes - not for read-modify-write operations.
2831 * Use cpu_register_map_client() to know when retrying the map operation is
2832 * likely to succeed.
2834 void *address_space_map(AddressSpace *as,
2835 hwaddr addr,
2836 hwaddr *plen,
2837 bool is_write)
2839 hwaddr len = *plen;
2840 hwaddr done = 0;
2841 hwaddr l, xlat, base;
2842 MemoryRegion *mr, *this_mr;
2843 ram_addr_t raddr;
2844 void *ptr;
2846 if (len == 0) {
2847 return NULL;
2850 l = len;
2851 rcu_read_lock();
2852 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2854 if (!memory_access_is_direct(mr, is_write)) {
2855 if (atomic_xchg(&bounce.in_use, true)) {
2856 rcu_read_unlock();
2857 return NULL;
2859 /* Avoid unbounded allocations */
2860 l = MIN(l, TARGET_PAGE_SIZE);
2861 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2862 bounce.addr = addr;
2863 bounce.len = l;
2865 memory_region_ref(mr);
2866 bounce.mr = mr;
2867 if (!is_write) {
2868 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2869 bounce.buffer, l);
2872 rcu_read_unlock();
2873 *plen = l;
2874 return bounce.buffer;
2877 base = xlat;
2878 raddr = memory_region_get_ram_addr(mr);
2880 for (;;) {
2881 len -= l;
2882 addr += l;
2883 done += l;
2884 if (len == 0) {
2885 break;
2888 l = len;
2889 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2890 if (this_mr != mr || xlat != base + done) {
2891 break;
2895 memory_region_ref(mr);
2896 *plen = done;
2897 ptr = qemu_ram_ptr_length(raddr + base, plen);
2898 rcu_read_unlock();
2900 return ptr;
2903 /* Unmaps a memory region previously mapped by address_space_map().
2904 * Will also mark the memory as dirty if is_write == 1. access_len gives
2905 * the amount of memory that was actually read or written by the caller.
2907 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2908 int is_write, hwaddr access_len)
2910 if (buffer != bounce.buffer) {
2911 MemoryRegion *mr;
2912 ram_addr_t addr1;
2914 mr = qemu_ram_addr_from_host(buffer, &addr1);
2915 assert(mr != NULL);
2916 if (is_write) {
2917 invalidate_and_set_dirty(mr, addr1, access_len);
2919 if (xen_enabled()) {
2920 xen_invalidate_map_cache_entry(buffer);
2922 memory_region_unref(mr);
2923 return;
2925 if (is_write) {
2926 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2927 bounce.buffer, access_len);
2929 qemu_vfree(bounce.buffer);
2930 bounce.buffer = NULL;
2931 memory_region_unref(bounce.mr);
2932 atomic_mb_set(&bounce.in_use, false);
2933 cpu_notify_map_clients();
2936 void *cpu_physical_memory_map(hwaddr addr,
2937 hwaddr *plen,
2938 int is_write)
2940 return address_space_map(&address_space_memory, addr, plen, is_write);
2943 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2944 int is_write, hwaddr access_len)
2946 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2949 /* warning: addr must be aligned */
2950 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2951 MemTxAttrs attrs,
2952 MemTxResult *result,
2953 enum device_endian endian)
2955 uint8_t *ptr;
2956 uint64_t val;
2957 MemoryRegion *mr;
2958 hwaddr l = 4;
2959 hwaddr addr1;
2960 MemTxResult r;
2961 bool release_lock = false;
2963 rcu_read_lock();
2964 mr = address_space_translate(as, addr, &addr1, &l, false);
2965 if (l < 4 || !memory_access_is_direct(mr, false)) {
2966 release_lock |= prepare_mmio_access(mr);
2968 /* I/O case */
2969 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2970 #if defined(TARGET_WORDS_BIGENDIAN)
2971 if (endian == DEVICE_LITTLE_ENDIAN) {
2972 val = bswap32(val);
2974 #else
2975 if (endian == DEVICE_BIG_ENDIAN) {
2976 val = bswap32(val);
2978 #endif
2979 } else {
2980 /* RAM case */
2981 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2982 & TARGET_PAGE_MASK)
2983 + addr1);
2984 switch (endian) {
2985 case DEVICE_LITTLE_ENDIAN:
2986 val = ldl_le_p(ptr);
2987 break;
2988 case DEVICE_BIG_ENDIAN:
2989 val = ldl_be_p(ptr);
2990 break;
2991 default:
2992 val = ldl_p(ptr);
2993 break;
2995 r = MEMTX_OK;
2997 if (result) {
2998 *result = r;
3000 if (release_lock) {
3001 qemu_mutex_unlock_iothread();
3003 rcu_read_unlock();
3004 return val;
3007 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3008 MemTxAttrs attrs, MemTxResult *result)
3010 return address_space_ldl_internal(as, addr, attrs, result,
3011 DEVICE_NATIVE_ENDIAN);
3014 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3015 MemTxAttrs attrs, MemTxResult *result)
3017 return address_space_ldl_internal(as, addr, attrs, result,
3018 DEVICE_LITTLE_ENDIAN);
3021 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3022 MemTxAttrs attrs, MemTxResult *result)
3024 return address_space_ldl_internal(as, addr, attrs, result,
3025 DEVICE_BIG_ENDIAN);
3028 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3030 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3033 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3035 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3038 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3040 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3043 /* warning: addr must be aligned */
3044 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3045 MemTxAttrs attrs,
3046 MemTxResult *result,
3047 enum device_endian endian)
3049 uint8_t *ptr;
3050 uint64_t val;
3051 MemoryRegion *mr;
3052 hwaddr l = 8;
3053 hwaddr addr1;
3054 MemTxResult r;
3055 bool release_lock = false;
3057 rcu_read_lock();
3058 mr = address_space_translate(as, addr, &addr1, &l,
3059 false);
3060 if (l < 8 || !memory_access_is_direct(mr, false)) {
3061 release_lock |= prepare_mmio_access(mr);
3063 /* I/O case */
3064 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3065 #if defined(TARGET_WORDS_BIGENDIAN)
3066 if (endian == DEVICE_LITTLE_ENDIAN) {
3067 val = bswap64(val);
3069 #else
3070 if (endian == DEVICE_BIG_ENDIAN) {
3071 val = bswap64(val);
3073 #endif
3074 } else {
3075 /* RAM case */
3076 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3077 & TARGET_PAGE_MASK)
3078 + addr1);
3079 switch (endian) {
3080 case DEVICE_LITTLE_ENDIAN:
3081 val = ldq_le_p(ptr);
3082 break;
3083 case DEVICE_BIG_ENDIAN:
3084 val = ldq_be_p(ptr);
3085 break;
3086 default:
3087 val = ldq_p(ptr);
3088 break;
3090 r = MEMTX_OK;
3092 if (result) {
3093 *result = r;
3095 if (release_lock) {
3096 qemu_mutex_unlock_iothread();
3098 rcu_read_unlock();
3099 return val;
3102 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3103 MemTxAttrs attrs, MemTxResult *result)
3105 return address_space_ldq_internal(as, addr, attrs, result,
3106 DEVICE_NATIVE_ENDIAN);
3109 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3110 MemTxAttrs attrs, MemTxResult *result)
3112 return address_space_ldq_internal(as, addr, attrs, result,
3113 DEVICE_LITTLE_ENDIAN);
3116 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3117 MemTxAttrs attrs, MemTxResult *result)
3119 return address_space_ldq_internal(as, addr, attrs, result,
3120 DEVICE_BIG_ENDIAN);
3123 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3125 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3128 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3130 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3133 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3135 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3138 /* XXX: optimize */
3139 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3140 MemTxAttrs attrs, MemTxResult *result)
3142 uint8_t val;
3143 MemTxResult r;
3145 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3146 if (result) {
3147 *result = r;
3149 return val;
3152 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3154 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3157 /* warning: addr must be aligned */
3158 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3159 hwaddr addr,
3160 MemTxAttrs attrs,
3161 MemTxResult *result,
3162 enum device_endian endian)
3164 uint8_t *ptr;
3165 uint64_t val;
3166 MemoryRegion *mr;
3167 hwaddr l = 2;
3168 hwaddr addr1;
3169 MemTxResult r;
3170 bool release_lock = false;
3172 rcu_read_lock();
3173 mr = address_space_translate(as, addr, &addr1, &l,
3174 false);
3175 if (l < 2 || !memory_access_is_direct(mr, false)) {
3176 release_lock |= prepare_mmio_access(mr);
3178 /* I/O case */
3179 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3180 #if defined(TARGET_WORDS_BIGENDIAN)
3181 if (endian == DEVICE_LITTLE_ENDIAN) {
3182 val = bswap16(val);
3184 #else
3185 if (endian == DEVICE_BIG_ENDIAN) {
3186 val = bswap16(val);
3188 #endif
3189 } else {
3190 /* RAM case */
3191 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3192 & TARGET_PAGE_MASK)
3193 + addr1);
3194 switch (endian) {
3195 case DEVICE_LITTLE_ENDIAN:
3196 val = lduw_le_p(ptr);
3197 break;
3198 case DEVICE_BIG_ENDIAN:
3199 val = lduw_be_p(ptr);
3200 break;
3201 default:
3202 val = lduw_p(ptr);
3203 break;
3205 r = MEMTX_OK;
3207 if (result) {
3208 *result = r;
3210 if (release_lock) {
3211 qemu_mutex_unlock_iothread();
3213 rcu_read_unlock();
3214 return val;
3217 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3218 MemTxAttrs attrs, MemTxResult *result)
3220 return address_space_lduw_internal(as, addr, attrs, result,
3221 DEVICE_NATIVE_ENDIAN);
3224 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3225 MemTxAttrs attrs, MemTxResult *result)
3227 return address_space_lduw_internal(as, addr, attrs, result,
3228 DEVICE_LITTLE_ENDIAN);
3231 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3232 MemTxAttrs attrs, MemTxResult *result)
3234 return address_space_lduw_internal(as, addr, attrs, result,
3235 DEVICE_BIG_ENDIAN);
3238 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3240 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3243 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3245 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3248 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3250 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3253 /* warning: addr must be aligned. The ram page is not masked as dirty
3254 and the code inside is not invalidated. It is useful if the dirty
3255 bits are used to track modified PTEs */
3256 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3257 MemTxAttrs attrs, MemTxResult *result)
3259 uint8_t *ptr;
3260 MemoryRegion *mr;
3261 hwaddr l = 4;
3262 hwaddr addr1;
3263 MemTxResult r;
3264 uint8_t dirty_log_mask;
3265 bool release_lock = false;
3267 rcu_read_lock();
3268 mr = address_space_translate(as, addr, &addr1, &l,
3269 true);
3270 if (l < 4 || !memory_access_is_direct(mr, true)) {
3271 release_lock |= prepare_mmio_access(mr);
3273 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3274 } else {
3275 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3276 ptr = qemu_get_ram_ptr(addr1);
3277 stl_p(ptr, val);
3279 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3280 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3281 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3282 r = MEMTX_OK;
3284 if (result) {
3285 *result = r;
3287 if (release_lock) {
3288 qemu_mutex_unlock_iothread();
3290 rcu_read_unlock();
3293 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3295 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3298 /* warning: addr must be aligned */
3299 static inline void address_space_stl_internal(AddressSpace *as,
3300 hwaddr addr, uint32_t val,
3301 MemTxAttrs attrs,
3302 MemTxResult *result,
3303 enum device_endian endian)
3305 uint8_t *ptr;
3306 MemoryRegion *mr;
3307 hwaddr l = 4;
3308 hwaddr addr1;
3309 MemTxResult r;
3310 bool release_lock = false;
3312 rcu_read_lock();
3313 mr = address_space_translate(as, addr, &addr1, &l,
3314 true);
3315 if (l < 4 || !memory_access_is_direct(mr, true)) {
3316 release_lock |= prepare_mmio_access(mr);
3318 #if defined(TARGET_WORDS_BIGENDIAN)
3319 if (endian == DEVICE_LITTLE_ENDIAN) {
3320 val = bswap32(val);
3322 #else
3323 if (endian == DEVICE_BIG_ENDIAN) {
3324 val = bswap32(val);
3326 #endif
3327 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3328 } else {
3329 /* RAM case */
3330 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3331 ptr = qemu_get_ram_ptr(addr1);
3332 switch (endian) {
3333 case DEVICE_LITTLE_ENDIAN:
3334 stl_le_p(ptr, val);
3335 break;
3336 case DEVICE_BIG_ENDIAN:
3337 stl_be_p(ptr, val);
3338 break;
3339 default:
3340 stl_p(ptr, val);
3341 break;
3343 invalidate_and_set_dirty(mr, addr1, 4);
3344 r = MEMTX_OK;
3346 if (result) {
3347 *result = r;
3349 if (release_lock) {
3350 qemu_mutex_unlock_iothread();
3352 rcu_read_unlock();
3355 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3356 MemTxAttrs attrs, MemTxResult *result)
3358 address_space_stl_internal(as, addr, val, attrs, result,
3359 DEVICE_NATIVE_ENDIAN);
3362 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3363 MemTxAttrs attrs, MemTxResult *result)
3365 address_space_stl_internal(as, addr, val, attrs, result,
3366 DEVICE_LITTLE_ENDIAN);
3369 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3370 MemTxAttrs attrs, MemTxResult *result)
3372 address_space_stl_internal(as, addr, val, attrs, result,
3373 DEVICE_BIG_ENDIAN);
3376 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3378 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3381 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3383 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3386 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3388 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3391 /* XXX: optimize */
3392 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3393 MemTxAttrs attrs, MemTxResult *result)
3395 uint8_t v = val;
3396 MemTxResult r;
3398 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3399 if (result) {
3400 *result = r;
3404 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3406 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3409 /* warning: addr must be aligned */
3410 static inline void address_space_stw_internal(AddressSpace *as,
3411 hwaddr addr, uint32_t val,
3412 MemTxAttrs attrs,
3413 MemTxResult *result,
3414 enum device_endian endian)
3416 uint8_t *ptr;
3417 MemoryRegion *mr;
3418 hwaddr l = 2;
3419 hwaddr addr1;
3420 MemTxResult r;
3421 bool release_lock = false;
3423 rcu_read_lock();
3424 mr = address_space_translate(as, addr, &addr1, &l, true);
3425 if (l < 2 || !memory_access_is_direct(mr, true)) {
3426 release_lock |= prepare_mmio_access(mr);
3428 #if defined(TARGET_WORDS_BIGENDIAN)
3429 if (endian == DEVICE_LITTLE_ENDIAN) {
3430 val = bswap16(val);
3432 #else
3433 if (endian == DEVICE_BIG_ENDIAN) {
3434 val = bswap16(val);
3436 #endif
3437 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3438 } else {
3439 /* RAM case */
3440 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3441 ptr = qemu_get_ram_ptr(addr1);
3442 switch (endian) {
3443 case DEVICE_LITTLE_ENDIAN:
3444 stw_le_p(ptr, val);
3445 break;
3446 case DEVICE_BIG_ENDIAN:
3447 stw_be_p(ptr, val);
3448 break;
3449 default:
3450 stw_p(ptr, val);
3451 break;
3453 invalidate_and_set_dirty(mr, addr1, 2);
3454 r = MEMTX_OK;
3456 if (result) {
3457 *result = r;
3459 if (release_lock) {
3460 qemu_mutex_unlock_iothread();
3462 rcu_read_unlock();
3465 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3466 MemTxAttrs attrs, MemTxResult *result)
3468 address_space_stw_internal(as, addr, val, attrs, result,
3469 DEVICE_NATIVE_ENDIAN);
3472 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3473 MemTxAttrs attrs, MemTxResult *result)
3475 address_space_stw_internal(as, addr, val, attrs, result,
3476 DEVICE_LITTLE_ENDIAN);
3479 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3480 MemTxAttrs attrs, MemTxResult *result)
3482 address_space_stw_internal(as, addr, val, attrs, result,
3483 DEVICE_BIG_ENDIAN);
3486 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3488 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3491 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3493 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3496 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3498 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3501 /* XXX: optimize */
3502 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3503 MemTxAttrs attrs, MemTxResult *result)
3505 MemTxResult r;
3506 val = tswap64(val);
3507 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3508 if (result) {
3509 *result = r;
3513 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3514 MemTxAttrs attrs, MemTxResult *result)
3516 MemTxResult r;
3517 val = cpu_to_le64(val);
3518 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3519 if (result) {
3520 *result = r;
3523 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3524 MemTxAttrs attrs, MemTxResult *result)
3526 MemTxResult r;
3527 val = cpu_to_be64(val);
3528 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3529 if (result) {
3530 *result = r;
3534 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3536 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3539 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3541 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3544 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3546 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3549 /* virtual memory access for debug (includes writing to ROM) */
3550 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3551 uint8_t *buf, int len, int is_write)
3553 int l;
3554 hwaddr phys_addr;
3555 target_ulong page;
3557 while (len > 0) {
3558 page = addr & TARGET_PAGE_MASK;
3559 phys_addr = cpu_get_phys_page_debug(cpu, page);
3560 /* if no physical page mapped, return an error */
3561 if (phys_addr == -1)
3562 return -1;
3563 l = (page + TARGET_PAGE_SIZE) - addr;
3564 if (l > len)
3565 l = len;
3566 phys_addr += (addr & ~TARGET_PAGE_MASK);
3567 if (is_write) {
3568 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3569 } else {
3570 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3571 buf, l, 0);
3573 len -= l;
3574 buf += l;
3575 addr += l;
3577 return 0;
3581 * Allows code that needs to deal with migration bitmaps etc to still be built
3582 * target independent.
3584 size_t qemu_target_page_bits(void)
3586 return TARGET_PAGE_BITS;
3589 #endif
3592 * A helper function for the _utterly broken_ virtio device model to find out if
3593 * it's running on a big endian machine. Don't do this at home kids!
3595 bool target_words_bigendian(void);
3596 bool target_words_bigendian(void)
3598 #if defined(TARGET_WORDS_BIGENDIAN)
3599 return true;
3600 #else
3601 return false;
3602 #endif
3605 #ifndef CONFIG_USER_ONLY
3606 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3608 MemoryRegion*mr;
3609 hwaddr l = 1;
3610 bool res;
3612 rcu_read_lock();
3613 mr = address_space_translate(&address_space_memory,
3614 phys_addr, &phys_addr, &l, false);
3616 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3617 rcu_read_unlock();
3618 return res;
3621 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3623 RAMBlock *block;
3624 int ret = 0;
3626 rcu_read_lock();
3627 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3628 ret = func(block->idstr, block->host, block->offset,
3629 block->used_length, opaque);
3630 if (ret) {
3631 break;
3634 rcu_read_unlock();
3635 return ret;
3637 #endif