libqos: allow zero-size allocations
[qemu/kevin.git] / exec.c
blob7f0ce42af0c70772d784f0976d3452b1cb7689fe
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
58 #include "qemu/range.h"
59 #ifndef _WIN32
60 #include "qemu/mmap-alloc.h"
61 #endif
63 //#define DEBUG_SUBPAGE
65 #if !defined(CONFIG_USER_ONLY)
66 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
67 * are protected by the ramlist lock.
69 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
71 static MemoryRegion *system_memory;
72 static MemoryRegion *system_io;
74 AddressSpace address_space_io;
75 AddressSpace address_space_memory;
77 MemoryRegion io_mem_rom, io_mem_notdirty;
78 static MemoryRegion io_mem_unassigned;
80 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
81 #define RAM_PREALLOC (1 << 0)
83 /* RAM is mmap-ed with MAP_SHARED */
84 #define RAM_SHARED (1 << 1)
86 /* Only a portion of RAM (used_length) is actually used, and migrated.
87 * This used_length size can change across reboots.
89 #define RAM_RESIZEABLE (1 << 2)
91 #endif
93 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
94 /* current CPU in the current thread. It is only valid inside
95 cpu_exec() */
96 __thread CPUState *current_cpu;
97 /* 0 = Do not count executed instructions.
98 1 = Precise instruction counting.
99 2 = Adaptive rate instruction counting. */
100 int use_icount;
102 #if !defined(CONFIG_USER_ONLY)
104 typedef struct PhysPageEntry PhysPageEntry;
106 struct PhysPageEntry {
107 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
108 uint32_t skip : 6;
109 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
110 uint32_t ptr : 26;
113 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
115 /* Size of the L2 (and L3, etc) page tables. */
116 #define ADDR_SPACE_BITS 64
118 #define P_L2_BITS 9
119 #define P_L2_SIZE (1 << P_L2_BITS)
121 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
123 typedef PhysPageEntry Node[P_L2_SIZE];
125 typedef struct PhysPageMap {
126 struct rcu_head rcu;
128 unsigned sections_nb;
129 unsigned sections_nb_alloc;
130 unsigned nodes_nb;
131 unsigned nodes_nb_alloc;
132 Node *nodes;
133 MemoryRegionSection *sections;
134 } PhysPageMap;
136 struct AddressSpaceDispatch {
137 struct rcu_head rcu;
139 /* This is a multi-level map on the physical address space.
140 * The bottom level has pointers to MemoryRegionSections.
142 PhysPageEntry phys_map;
143 PhysPageMap map;
144 AddressSpace *as;
147 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
148 typedef struct subpage_t {
149 MemoryRegion iomem;
150 AddressSpace *as;
151 hwaddr base;
152 uint16_t sub_section[TARGET_PAGE_SIZE];
153 } subpage_t;
155 #define PHYS_SECTION_UNASSIGNED 0
156 #define PHYS_SECTION_NOTDIRTY 1
157 #define PHYS_SECTION_ROM 2
158 #define PHYS_SECTION_WATCH 3
160 static void io_mem_init(void);
161 static void memory_map_init(void);
162 static void tcg_commit(MemoryListener *listener);
164 static MemoryRegion io_mem_watch;
167 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
168 * @cpu: the CPU whose AddressSpace this is
169 * @as: the AddressSpace itself
170 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
171 * @tcg_as_listener: listener for tracking changes to the AddressSpace
173 struct CPUAddressSpace {
174 CPUState *cpu;
175 AddressSpace *as;
176 struct AddressSpaceDispatch *memory_dispatch;
177 MemoryListener tcg_as_listener;
180 #endif
182 #if !defined(CONFIG_USER_ONLY)
184 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
186 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
187 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
188 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
189 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
193 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
195 unsigned i;
196 uint32_t ret;
197 PhysPageEntry e;
198 PhysPageEntry *p;
200 ret = map->nodes_nb++;
201 p = map->nodes[ret];
202 assert(ret != PHYS_MAP_NODE_NIL);
203 assert(ret != map->nodes_nb_alloc);
205 e.skip = leaf ? 0 : 1;
206 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
207 for (i = 0; i < P_L2_SIZE; ++i) {
208 memcpy(&p[i], &e, sizeof(e));
210 return ret;
213 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
214 hwaddr *index, hwaddr *nb, uint16_t leaf,
215 int level)
217 PhysPageEntry *p;
218 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
220 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
221 lp->ptr = phys_map_node_alloc(map, level == 0);
223 p = map->nodes[lp->ptr];
224 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
226 while (*nb && lp < &p[P_L2_SIZE]) {
227 if ((*index & (step - 1)) == 0 && *nb >= step) {
228 lp->skip = 0;
229 lp->ptr = leaf;
230 *index += step;
231 *nb -= step;
232 } else {
233 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
235 ++lp;
239 static void phys_page_set(AddressSpaceDispatch *d,
240 hwaddr index, hwaddr nb,
241 uint16_t leaf)
243 /* Wildly overreserve - it doesn't matter much. */
244 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
246 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
249 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
250 * and update our entry so we can skip it and go directly to the destination.
252 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
254 unsigned valid_ptr = P_L2_SIZE;
255 int valid = 0;
256 PhysPageEntry *p;
257 int i;
259 if (lp->ptr == PHYS_MAP_NODE_NIL) {
260 return;
263 p = nodes[lp->ptr];
264 for (i = 0; i < P_L2_SIZE; i++) {
265 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
266 continue;
269 valid_ptr = i;
270 valid++;
271 if (p[i].skip) {
272 phys_page_compact(&p[i], nodes, compacted);
276 /* We can only compress if there's only one child. */
277 if (valid != 1) {
278 return;
281 assert(valid_ptr < P_L2_SIZE);
283 /* Don't compress if it won't fit in the # of bits we have. */
284 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
285 return;
288 lp->ptr = p[valid_ptr].ptr;
289 if (!p[valid_ptr].skip) {
290 /* If our only child is a leaf, make this a leaf. */
291 /* By design, we should have made this node a leaf to begin with so we
292 * should never reach here.
293 * But since it's so simple to handle this, let's do it just in case we
294 * change this rule.
296 lp->skip = 0;
297 } else {
298 lp->skip += p[valid_ptr].skip;
302 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
304 DECLARE_BITMAP(compacted, nodes_nb);
306 if (d->phys_map.skip) {
307 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
311 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
312 Node *nodes, MemoryRegionSection *sections)
314 PhysPageEntry *p;
315 hwaddr index = addr >> TARGET_PAGE_BITS;
316 int i;
318 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
319 if (lp.ptr == PHYS_MAP_NODE_NIL) {
320 return &sections[PHYS_SECTION_UNASSIGNED];
322 p = nodes[lp.ptr];
323 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
326 if (sections[lp.ptr].size.hi ||
327 range_covers_byte(sections[lp.ptr].offset_within_address_space,
328 sections[lp.ptr].size.lo, addr)) {
329 return &sections[lp.ptr];
330 } else {
331 return &sections[PHYS_SECTION_UNASSIGNED];
335 bool memory_region_is_unassigned(MemoryRegion *mr)
337 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
338 && mr != &io_mem_watch;
341 /* Called from RCU critical section */
342 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
343 hwaddr addr,
344 bool resolve_subpage)
346 MemoryRegionSection *section;
347 subpage_t *subpage;
349 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
350 if (resolve_subpage && section->mr->subpage) {
351 subpage = container_of(section->mr, subpage_t, iomem);
352 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
354 return section;
357 /* Called from RCU critical section */
358 static MemoryRegionSection *
359 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
360 hwaddr *plen, bool resolve_subpage)
362 MemoryRegionSection *section;
363 MemoryRegion *mr;
364 Int128 diff;
366 section = address_space_lookup_region(d, addr, resolve_subpage);
367 /* Compute offset within MemoryRegionSection */
368 addr -= section->offset_within_address_space;
370 /* Compute offset within MemoryRegion */
371 *xlat = addr + section->offset_within_region;
373 mr = section->mr;
375 /* MMIO registers can be expected to perform full-width accesses based only
376 * on their address, without considering adjacent registers that could
377 * decode to completely different MemoryRegions. When such registers
378 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
379 * regions overlap wildly. For this reason we cannot clamp the accesses
380 * here.
382 * If the length is small (as is the case for address_space_ldl/stl),
383 * everything works fine. If the incoming length is large, however,
384 * the caller really has to do the clamping through memory_access_size.
386 if (memory_region_is_ram(mr)) {
387 diff = int128_sub(section->size, int128_make64(addr));
388 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
390 return section;
393 /* Called from RCU critical section */
394 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
395 hwaddr *xlat, hwaddr *plen,
396 bool is_write)
398 IOMMUTLBEntry iotlb;
399 MemoryRegionSection *section;
400 MemoryRegion *mr;
402 for (;;) {
403 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
404 section = address_space_translate_internal(d, addr, &addr, plen, true);
405 mr = section->mr;
407 if (!mr->iommu_ops) {
408 break;
411 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
412 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
413 | (addr & iotlb.addr_mask));
414 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
415 if (!(iotlb.perm & (1 << is_write))) {
416 mr = &io_mem_unassigned;
417 break;
420 as = iotlb.target_as;
423 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
424 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
425 *plen = MIN(page, *plen);
428 *xlat = addr;
429 return mr;
432 /* Called from RCU critical section */
433 MemoryRegionSection *
434 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
435 hwaddr *xlat, hwaddr *plen)
437 MemoryRegionSection *section;
438 section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
439 addr, xlat, plen, false);
441 assert(!section->mr->iommu_ops);
442 return section;
444 #endif
446 #if !defined(CONFIG_USER_ONLY)
448 static int cpu_common_post_load(void *opaque, int version_id)
450 CPUState *cpu = opaque;
452 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
453 version_id is increased. */
454 cpu->interrupt_request &= ~0x01;
455 tlb_flush(cpu, 1);
457 return 0;
460 static int cpu_common_pre_load(void *opaque)
462 CPUState *cpu = opaque;
464 cpu->exception_index = -1;
466 return 0;
469 static bool cpu_common_exception_index_needed(void *opaque)
471 CPUState *cpu = opaque;
473 return tcg_enabled() && cpu->exception_index != -1;
476 static const VMStateDescription vmstate_cpu_common_exception_index = {
477 .name = "cpu_common/exception_index",
478 .version_id = 1,
479 .minimum_version_id = 1,
480 .needed = cpu_common_exception_index_needed,
481 .fields = (VMStateField[]) {
482 VMSTATE_INT32(exception_index, CPUState),
483 VMSTATE_END_OF_LIST()
487 static bool cpu_common_crash_occurred_needed(void *opaque)
489 CPUState *cpu = opaque;
491 return cpu->crash_occurred;
494 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
495 .name = "cpu_common/crash_occurred",
496 .version_id = 1,
497 .minimum_version_id = 1,
498 .needed = cpu_common_crash_occurred_needed,
499 .fields = (VMStateField[]) {
500 VMSTATE_BOOL(crash_occurred, CPUState),
501 VMSTATE_END_OF_LIST()
505 const VMStateDescription vmstate_cpu_common = {
506 .name = "cpu_common",
507 .version_id = 1,
508 .minimum_version_id = 1,
509 .pre_load = cpu_common_pre_load,
510 .post_load = cpu_common_post_load,
511 .fields = (VMStateField[]) {
512 VMSTATE_UINT32(halted, CPUState),
513 VMSTATE_UINT32(interrupt_request, CPUState),
514 VMSTATE_END_OF_LIST()
516 .subsections = (const VMStateDescription*[]) {
517 &vmstate_cpu_common_exception_index,
518 &vmstate_cpu_common_crash_occurred,
519 NULL
523 #endif
525 CPUState *qemu_get_cpu(int index)
527 CPUState *cpu;
529 CPU_FOREACH(cpu) {
530 if (cpu->cpu_index == index) {
531 return cpu;
535 return NULL;
538 #if !defined(CONFIG_USER_ONLY)
539 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
541 /* We only support one address space per cpu at the moment. */
542 assert(cpu->as == as);
544 if (cpu->cpu_ases) {
545 /* We've already registered the listener for our only AS */
546 return;
549 cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
550 cpu->cpu_ases[0].cpu = cpu;
551 cpu->cpu_ases[0].as = as;
552 cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
553 memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
555 #endif
557 #ifndef CONFIG_USER_ONLY
558 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
560 static int cpu_get_free_index(Error **errp)
562 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
564 if (cpu >= MAX_CPUMASK_BITS) {
565 error_setg(errp, "Trying to use more CPUs than max of %d",
566 MAX_CPUMASK_BITS);
567 return -1;
570 bitmap_set(cpu_index_map, cpu, 1);
571 return cpu;
574 void cpu_exec_exit(CPUState *cpu)
576 if (cpu->cpu_index == -1) {
577 /* cpu_index was never allocated by this @cpu or was already freed. */
578 return;
581 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
582 cpu->cpu_index = -1;
584 #else
586 static int cpu_get_free_index(Error **errp)
588 CPUState *some_cpu;
589 int cpu_index = 0;
591 CPU_FOREACH(some_cpu) {
592 cpu_index++;
594 return cpu_index;
597 void cpu_exec_exit(CPUState *cpu)
600 #endif
602 void cpu_exec_init(CPUState *cpu, Error **errp)
604 CPUClass *cc = CPU_GET_CLASS(cpu);
605 int cpu_index;
606 Error *local_err = NULL;
608 #ifndef CONFIG_USER_ONLY
609 cpu->as = &address_space_memory;
610 cpu->thread_id = qemu_get_thread_id();
611 #endif
613 #if defined(CONFIG_USER_ONLY)
614 cpu_list_lock();
615 #endif
616 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
617 if (local_err) {
618 error_propagate(errp, local_err);
619 #if defined(CONFIG_USER_ONLY)
620 cpu_list_unlock();
621 #endif
622 return;
624 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
625 #if defined(CONFIG_USER_ONLY)
626 cpu_list_unlock();
627 #endif
628 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
629 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
631 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
632 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
633 cpu_save, cpu_load, cpu->env_ptr);
634 assert(cc->vmsd == NULL);
635 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
636 #endif
637 if (cc->vmsd != NULL) {
638 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
642 #if defined(CONFIG_USER_ONLY)
643 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
645 tb_invalidate_phys_page_range(pc, pc + 1, 0);
647 #else
648 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
650 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
651 if (phys != -1) {
652 tb_invalidate_phys_addr(cpu->as,
653 phys | (pc & ~TARGET_PAGE_MASK));
656 #endif
658 #if defined(CONFIG_USER_ONLY)
659 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
664 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
665 int flags)
667 return -ENOSYS;
670 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
674 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
675 int flags, CPUWatchpoint **watchpoint)
677 return -ENOSYS;
679 #else
680 /* Add a watchpoint. */
681 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
682 int flags, CPUWatchpoint **watchpoint)
684 CPUWatchpoint *wp;
686 /* forbid ranges which are empty or run off the end of the address space */
687 if (len == 0 || (addr + len - 1) < addr) {
688 error_report("tried to set invalid watchpoint at %"
689 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
690 return -EINVAL;
692 wp = g_malloc(sizeof(*wp));
694 wp->vaddr = addr;
695 wp->len = len;
696 wp->flags = flags;
698 /* keep all GDB-injected watchpoints in front */
699 if (flags & BP_GDB) {
700 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
701 } else {
702 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
705 tlb_flush_page(cpu, addr);
707 if (watchpoint)
708 *watchpoint = wp;
709 return 0;
712 /* Remove a specific watchpoint. */
713 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
714 int flags)
716 CPUWatchpoint *wp;
718 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
719 if (addr == wp->vaddr && len == wp->len
720 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
721 cpu_watchpoint_remove_by_ref(cpu, wp);
722 return 0;
725 return -ENOENT;
728 /* Remove a specific watchpoint by reference. */
729 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
731 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
733 tlb_flush_page(cpu, watchpoint->vaddr);
735 g_free(watchpoint);
738 /* Remove all matching watchpoints. */
739 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
741 CPUWatchpoint *wp, *next;
743 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
744 if (wp->flags & mask) {
745 cpu_watchpoint_remove_by_ref(cpu, wp);
750 /* Return true if this watchpoint address matches the specified
751 * access (ie the address range covered by the watchpoint overlaps
752 * partially or completely with the address range covered by the
753 * access).
755 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
756 vaddr addr,
757 vaddr len)
759 /* We know the lengths are non-zero, but a little caution is
760 * required to avoid errors in the case where the range ends
761 * exactly at the top of the address space and so addr + len
762 * wraps round to zero.
764 vaddr wpend = wp->vaddr + wp->len - 1;
765 vaddr addrend = addr + len - 1;
767 return !(addr > wpend || wp->vaddr > addrend);
770 #endif
772 /* Add a breakpoint. */
773 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
774 CPUBreakpoint **breakpoint)
776 CPUBreakpoint *bp;
778 bp = g_malloc(sizeof(*bp));
780 bp->pc = pc;
781 bp->flags = flags;
783 /* keep all GDB-injected breakpoints in front */
784 if (flags & BP_GDB) {
785 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
786 } else {
787 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
790 breakpoint_invalidate(cpu, pc);
792 if (breakpoint) {
793 *breakpoint = bp;
795 return 0;
798 /* Remove a specific breakpoint. */
799 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
801 CPUBreakpoint *bp;
803 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
804 if (bp->pc == pc && bp->flags == flags) {
805 cpu_breakpoint_remove_by_ref(cpu, bp);
806 return 0;
809 return -ENOENT;
812 /* Remove a specific breakpoint by reference. */
813 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
815 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
817 breakpoint_invalidate(cpu, breakpoint->pc);
819 g_free(breakpoint);
822 /* Remove all matching breakpoints. */
823 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
825 CPUBreakpoint *bp, *next;
827 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
828 if (bp->flags & mask) {
829 cpu_breakpoint_remove_by_ref(cpu, bp);
834 /* enable or disable single step mode. EXCP_DEBUG is returned by the
835 CPU loop after each instruction */
836 void cpu_single_step(CPUState *cpu, int enabled)
838 if (cpu->singlestep_enabled != enabled) {
839 cpu->singlestep_enabled = enabled;
840 if (kvm_enabled()) {
841 kvm_update_guest_debug(cpu, 0);
842 } else {
843 /* must flush all the translated code to avoid inconsistencies */
844 /* XXX: only flush what is necessary */
845 tb_flush(cpu);
850 void cpu_abort(CPUState *cpu, const char *fmt, ...)
852 va_list ap;
853 va_list ap2;
855 va_start(ap, fmt);
856 va_copy(ap2, ap);
857 fprintf(stderr, "qemu: fatal: ");
858 vfprintf(stderr, fmt, ap);
859 fprintf(stderr, "\n");
860 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
861 if (qemu_log_separate()) {
862 qemu_log("qemu: fatal: ");
863 qemu_log_vprintf(fmt, ap2);
864 qemu_log("\n");
865 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
866 qemu_log_flush();
867 qemu_log_close();
869 va_end(ap2);
870 va_end(ap);
871 replay_finish();
872 #if defined(CONFIG_USER_ONLY)
874 struct sigaction act;
875 sigfillset(&act.sa_mask);
876 act.sa_handler = SIG_DFL;
877 sigaction(SIGABRT, &act, NULL);
879 #endif
880 abort();
883 #if !defined(CONFIG_USER_ONLY)
884 /* Called from RCU critical section */
885 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
887 RAMBlock *block;
889 block = atomic_rcu_read(&ram_list.mru_block);
890 if (block && addr - block->offset < block->max_length) {
891 return block;
893 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
894 if (addr - block->offset < block->max_length) {
895 goto found;
899 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
900 abort();
902 found:
903 /* It is safe to write mru_block outside the iothread lock. This
904 * is what happens:
906 * mru_block = xxx
907 * rcu_read_unlock()
908 * xxx removed from list
909 * rcu_read_lock()
910 * read mru_block
911 * mru_block = NULL;
912 * call_rcu(reclaim_ramblock, xxx);
913 * rcu_read_unlock()
915 * atomic_rcu_set is not needed here. The block was already published
916 * when it was placed into the list. Here we're just making an extra
917 * copy of the pointer.
919 ram_list.mru_block = block;
920 return block;
923 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
925 CPUState *cpu;
926 ram_addr_t start1;
927 RAMBlock *block;
928 ram_addr_t end;
930 end = TARGET_PAGE_ALIGN(start + length);
931 start &= TARGET_PAGE_MASK;
933 rcu_read_lock();
934 block = qemu_get_ram_block(start);
935 assert(block == qemu_get_ram_block(end - 1));
936 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
937 CPU_FOREACH(cpu) {
938 tlb_reset_dirty(cpu, start1, length);
940 rcu_read_unlock();
943 /* Note: start and end must be within the same ram block. */
944 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
945 ram_addr_t length,
946 unsigned client)
948 unsigned long end, page;
949 bool dirty;
951 if (length == 0) {
952 return false;
955 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
956 page = start >> TARGET_PAGE_BITS;
957 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
958 page, end - page);
960 if (dirty && tcg_enabled()) {
961 tlb_reset_dirty_range_all(start, length);
964 return dirty;
967 /* Called from RCU critical section */
968 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
969 MemoryRegionSection *section,
970 target_ulong vaddr,
971 hwaddr paddr, hwaddr xlat,
972 int prot,
973 target_ulong *address)
975 hwaddr iotlb;
976 CPUWatchpoint *wp;
978 if (memory_region_is_ram(section->mr)) {
979 /* Normal RAM. */
980 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
981 + xlat;
982 if (!section->readonly) {
983 iotlb |= PHYS_SECTION_NOTDIRTY;
984 } else {
985 iotlb |= PHYS_SECTION_ROM;
987 } else {
988 AddressSpaceDispatch *d;
990 d = atomic_rcu_read(&section->address_space->dispatch);
991 iotlb = section - d->map.sections;
992 iotlb += xlat;
995 /* Make accesses to pages with watchpoints go via the
996 watchpoint trap routines. */
997 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
998 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
999 /* Avoid trapping reads of pages with a write breakpoint. */
1000 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1001 iotlb = PHYS_SECTION_WATCH + paddr;
1002 *address |= TLB_MMIO;
1003 break;
1008 return iotlb;
1010 #endif /* defined(CONFIG_USER_ONLY) */
1012 #if !defined(CONFIG_USER_ONLY)
1014 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1015 uint16_t section);
1016 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1018 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1019 qemu_anon_ram_alloc;
1022 * Set a custom physical guest memory alloator.
1023 * Accelerators with unusual needs may need this. Hopefully, we can
1024 * get rid of it eventually.
1026 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1028 phys_mem_alloc = alloc;
1031 static uint16_t phys_section_add(PhysPageMap *map,
1032 MemoryRegionSection *section)
1034 /* The physical section number is ORed with a page-aligned
1035 * pointer to produce the iotlb entries. Thus it should
1036 * never overflow into the page-aligned value.
1038 assert(map->sections_nb < TARGET_PAGE_SIZE);
1040 if (map->sections_nb == map->sections_nb_alloc) {
1041 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1042 map->sections = g_renew(MemoryRegionSection, map->sections,
1043 map->sections_nb_alloc);
1045 map->sections[map->sections_nb] = *section;
1046 memory_region_ref(section->mr);
1047 return map->sections_nb++;
1050 static void phys_section_destroy(MemoryRegion *mr)
1052 bool have_sub_page = mr->subpage;
1054 memory_region_unref(mr);
1056 if (have_sub_page) {
1057 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1058 object_unref(OBJECT(&subpage->iomem));
1059 g_free(subpage);
1063 static void phys_sections_free(PhysPageMap *map)
1065 while (map->sections_nb > 0) {
1066 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1067 phys_section_destroy(section->mr);
1069 g_free(map->sections);
1070 g_free(map->nodes);
1073 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1075 subpage_t *subpage;
1076 hwaddr base = section->offset_within_address_space
1077 & TARGET_PAGE_MASK;
1078 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1079 d->map.nodes, d->map.sections);
1080 MemoryRegionSection subsection = {
1081 .offset_within_address_space = base,
1082 .size = int128_make64(TARGET_PAGE_SIZE),
1084 hwaddr start, end;
1086 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1088 if (!(existing->mr->subpage)) {
1089 subpage = subpage_init(d->as, base);
1090 subsection.address_space = d->as;
1091 subsection.mr = &subpage->iomem;
1092 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1093 phys_section_add(&d->map, &subsection));
1094 } else {
1095 subpage = container_of(existing->mr, subpage_t, iomem);
1097 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1098 end = start + int128_get64(section->size) - 1;
1099 subpage_register(subpage, start, end,
1100 phys_section_add(&d->map, section));
1104 static void register_multipage(AddressSpaceDispatch *d,
1105 MemoryRegionSection *section)
1107 hwaddr start_addr = section->offset_within_address_space;
1108 uint16_t section_index = phys_section_add(&d->map, section);
1109 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1110 TARGET_PAGE_BITS));
1112 assert(num_pages);
1113 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1116 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1118 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1119 AddressSpaceDispatch *d = as->next_dispatch;
1120 MemoryRegionSection now = *section, remain = *section;
1121 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1123 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1124 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1125 - now.offset_within_address_space;
1127 now.size = int128_min(int128_make64(left), now.size);
1128 register_subpage(d, &now);
1129 } else {
1130 now.size = int128_zero();
1132 while (int128_ne(remain.size, now.size)) {
1133 remain.size = int128_sub(remain.size, now.size);
1134 remain.offset_within_address_space += int128_get64(now.size);
1135 remain.offset_within_region += int128_get64(now.size);
1136 now = remain;
1137 if (int128_lt(remain.size, page_size)) {
1138 register_subpage(d, &now);
1139 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1140 now.size = page_size;
1141 register_subpage(d, &now);
1142 } else {
1143 now.size = int128_and(now.size, int128_neg(page_size));
1144 register_multipage(d, &now);
1149 void qemu_flush_coalesced_mmio_buffer(void)
1151 if (kvm_enabled())
1152 kvm_flush_coalesced_mmio_buffer();
1155 void qemu_mutex_lock_ramlist(void)
1157 qemu_mutex_lock(&ram_list.mutex);
1160 void qemu_mutex_unlock_ramlist(void)
1162 qemu_mutex_unlock(&ram_list.mutex);
1165 #ifdef __linux__
1167 #include <sys/vfs.h>
1169 #define HUGETLBFS_MAGIC 0x958458f6
1171 static long gethugepagesize(const char *path, Error **errp)
1173 struct statfs fs;
1174 int ret;
1176 do {
1177 ret = statfs(path, &fs);
1178 } while (ret != 0 && errno == EINTR);
1180 if (ret != 0) {
1181 error_setg_errno(errp, errno, "failed to get page size of file %s",
1182 path);
1183 return 0;
1186 return fs.f_bsize;
1189 static void *file_ram_alloc(RAMBlock *block,
1190 ram_addr_t memory,
1191 const char *path,
1192 Error **errp)
1194 struct stat st;
1195 char *filename;
1196 char *sanitized_name;
1197 char *c;
1198 void *area;
1199 int fd;
1200 uint64_t hpagesize;
1201 Error *local_err = NULL;
1203 hpagesize = gethugepagesize(path, &local_err);
1204 if (local_err) {
1205 error_propagate(errp, local_err);
1206 goto error;
1208 block->mr->align = hpagesize;
1210 if (memory < hpagesize) {
1211 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1212 "or larger than huge page size 0x%" PRIx64,
1213 memory, hpagesize);
1214 goto error;
1217 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1218 error_setg(errp,
1219 "host lacks kvm mmu notifiers, -mem-path unsupported");
1220 goto error;
1223 if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1224 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1225 sanitized_name = g_strdup(memory_region_name(block->mr));
1226 for (c = sanitized_name; *c != '\0'; c++) {
1227 if (*c == '/') {
1228 *c = '_';
1232 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1233 sanitized_name);
1234 g_free(sanitized_name);
1236 fd = mkstemp(filename);
1237 if (fd >= 0) {
1238 unlink(filename);
1240 g_free(filename);
1241 } else {
1242 fd = open(path, O_RDWR | O_CREAT, 0644);
1245 if (fd < 0) {
1246 error_setg_errno(errp, errno,
1247 "unable to create backing store for hugepages");
1248 goto error;
1251 memory = ROUND_UP(memory, hpagesize);
1254 * ftruncate is not supported by hugetlbfs in older
1255 * hosts, so don't bother bailing out on errors.
1256 * If anything goes wrong with it under other filesystems,
1257 * mmap will fail.
1259 if (ftruncate(fd, memory)) {
1260 perror("ftruncate");
1263 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1264 if (area == MAP_FAILED) {
1265 error_setg_errno(errp, errno,
1266 "unable to map backing store for hugepages");
1267 close(fd);
1268 goto error;
1271 if (mem_prealloc) {
1272 os_mem_prealloc(fd, area, memory);
1275 block->fd = fd;
1276 return area;
1278 error:
1279 return NULL;
1281 #endif
1283 /* Called with the ramlist lock held. */
1284 static ram_addr_t find_ram_offset(ram_addr_t size)
1286 RAMBlock *block, *next_block;
1287 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1289 assert(size != 0); /* it would hand out same offset multiple times */
1291 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1292 return 0;
1295 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1296 ram_addr_t end, next = RAM_ADDR_MAX;
1298 end = block->offset + block->max_length;
1300 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1301 if (next_block->offset >= end) {
1302 next = MIN(next, next_block->offset);
1305 if (next - end >= size && next - end < mingap) {
1306 offset = end;
1307 mingap = next - end;
1311 if (offset == RAM_ADDR_MAX) {
1312 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1313 (uint64_t)size);
1314 abort();
1317 return offset;
1320 ram_addr_t last_ram_offset(void)
1322 RAMBlock *block;
1323 ram_addr_t last = 0;
1325 rcu_read_lock();
1326 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1327 last = MAX(last, block->offset + block->max_length);
1329 rcu_read_unlock();
1330 return last;
1333 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1335 int ret;
1337 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1338 if (!machine_dump_guest_core(current_machine)) {
1339 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1340 if (ret) {
1341 perror("qemu_madvise");
1342 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1343 "but dump_guest_core=off specified\n");
1348 /* Called within an RCU critical section, or while the ramlist lock
1349 * is held.
1351 static RAMBlock *find_ram_block(ram_addr_t addr)
1353 RAMBlock *block;
1355 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1356 if (block->offset == addr) {
1357 return block;
1361 return NULL;
1364 const char *qemu_ram_get_idstr(RAMBlock *rb)
1366 return rb->idstr;
1369 /* Called with iothread lock held. */
1370 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1372 RAMBlock *new_block, *block;
1374 rcu_read_lock();
1375 new_block = find_ram_block(addr);
1376 assert(new_block);
1377 assert(!new_block->idstr[0]);
1379 if (dev) {
1380 char *id = qdev_get_dev_path(dev);
1381 if (id) {
1382 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1383 g_free(id);
1386 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1388 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1389 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1390 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1391 new_block->idstr);
1392 abort();
1395 rcu_read_unlock();
1398 /* Called with iothread lock held. */
1399 void qemu_ram_unset_idstr(ram_addr_t addr)
1401 RAMBlock *block;
1403 /* FIXME: arch_init.c assumes that this is not called throughout
1404 * migration. Ignore the problem since hot-unplug during migration
1405 * does not work anyway.
1408 rcu_read_lock();
1409 block = find_ram_block(addr);
1410 if (block) {
1411 memset(block->idstr, 0, sizeof(block->idstr));
1413 rcu_read_unlock();
1416 static int memory_try_enable_merging(void *addr, size_t len)
1418 if (!machine_mem_merge(current_machine)) {
1419 /* disabled by the user */
1420 return 0;
1423 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1426 /* Only legal before guest might have detected the memory size: e.g. on
1427 * incoming migration, or right after reset.
1429 * As memory core doesn't know how is memory accessed, it is up to
1430 * resize callback to update device state and/or add assertions to detect
1431 * misuse, if necessary.
1433 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1435 RAMBlock *block = find_ram_block(base);
1437 assert(block);
1439 newsize = HOST_PAGE_ALIGN(newsize);
1441 if (block->used_length == newsize) {
1442 return 0;
1445 if (!(block->flags & RAM_RESIZEABLE)) {
1446 error_setg_errno(errp, EINVAL,
1447 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1448 " in != 0x" RAM_ADDR_FMT, block->idstr,
1449 newsize, block->used_length);
1450 return -EINVAL;
1453 if (block->max_length < newsize) {
1454 error_setg_errno(errp, EINVAL,
1455 "Length too large: %s: 0x" RAM_ADDR_FMT
1456 " > 0x" RAM_ADDR_FMT, block->idstr,
1457 newsize, block->max_length);
1458 return -EINVAL;
1461 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1462 block->used_length = newsize;
1463 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1464 DIRTY_CLIENTS_ALL);
1465 memory_region_set_size(block->mr, newsize);
1466 if (block->resized) {
1467 block->resized(block->idstr, newsize, block->host);
1469 return 0;
1472 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1474 RAMBlock *block;
1475 RAMBlock *last_block = NULL;
1476 ram_addr_t old_ram_size, new_ram_size;
1478 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1480 qemu_mutex_lock_ramlist();
1481 new_block->offset = find_ram_offset(new_block->max_length);
1483 if (!new_block->host) {
1484 if (xen_enabled()) {
1485 xen_ram_alloc(new_block->offset, new_block->max_length,
1486 new_block->mr);
1487 } else {
1488 new_block->host = phys_mem_alloc(new_block->max_length,
1489 &new_block->mr->align);
1490 if (!new_block->host) {
1491 error_setg_errno(errp, errno,
1492 "cannot set up guest memory '%s'",
1493 memory_region_name(new_block->mr));
1494 qemu_mutex_unlock_ramlist();
1495 return -1;
1497 memory_try_enable_merging(new_block->host, new_block->max_length);
1501 new_ram_size = MAX(old_ram_size,
1502 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1503 if (new_ram_size > old_ram_size) {
1504 migration_bitmap_extend(old_ram_size, new_ram_size);
1506 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1507 * QLIST (which has an RCU-friendly variant) does not have insertion at
1508 * tail, so save the last element in last_block.
1510 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1511 last_block = block;
1512 if (block->max_length < new_block->max_length) {
1513 break;
1516 if (block) {
1517 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1518 } else if (last_block) {
1519 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1520 } else { /* list is empty */
1521 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1523 ram_list.mru_block = NULL;
1525 /* Write list before version */
1526 smp_wmb();
1527 ram_list.version++;
1528 qemu_mutex_unlock_ramlist();
1530 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1532 if (new_ram_size > old_ram_size) {
1533 int i;
1535 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1536 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1537 ram_list.dirty_memory[i] =
1538 bitmap_zero_extend(ram_list.dirty_memory[i],
1539 old_ram_size, new_ram_size);
1542 cpu_physical_memory_set_dirty_range(new_block->offset,
1543 new_block->used_length,
1544 DIRTY_CLIENTS_ALL);
1546 if (new_block->host) {
1547 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1548 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1549 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1550 if (kvm_enabled()) {
1551 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1555 return new_block->offset;
1558 #ifdef __linux__
1559 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1560 bool share, const char *mem_path,
1561 Error **errp)
1563 RAMBlock *new_block;
1564 ram_addr_t addr;
1565 Error *local_err = NULL;
1567 if (xen_enabled()) {
1568 error_setg(errp, "-mem-path not supported with Xen");
1569 return -1;
1572 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1574 * file_ram_alloc() needs to allocate just like
1575 * phys_mem_alloc, but we haven't bothered to provide
1576 * a hook there.
1578 error_setg(errp,
1579 "-mem-path not supported with this accelerator");
1580 return -1;
1583 size = HOST_PAGE_ALIGN(size);
1584 new_block = g_malloc0(sizeof(*new_block));
1585 new_block->mr = mr;
1586 new_block->used_length = size;
1587 new_block->max_length = size;
1588 new_block->flags = share ? RAM_SHARED : 0;
1589 new_block->host = file_ram_alloc(new_block, size,
1590 mem_path, errp);
1591 if (!new_block->host) {
1592 g_free(new_block);
1593 return -1;
1596 addr = ram_block_add(new_block, &local_err);
1597 if (local_err) {
1598 g_free(new_block);
1599 error_propagate(errp, local_err);
1600 return -1;
1602 return addr;
1604 #endif
1606 static
1607 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1608 void (*resized)(const char*,
1609 uint64_t length,
1610 void *host),
1611 void *host, bool resizeable,
1612 MemoryRegion *mr, Error **errp)
1614 RAMBlock *new_block;
1615 ram_addr_t addr;
1616 Error *local_err = NULL;
1618 size = HOST_PAGE_ALIGN(size);
1619 max_size = HOST_PAGE_ALIGN(max_size);
1620 new_block = g_malloc0(sizeof(*new_block));
1621 new_block->mr = mr;
1622 new_block->resized = resized;
1623 new_block->used_length = size;
1624 new_block->max_length = max_size;
1625 assert(max_size >= size);
1626 new_block->fd = -1;
1627 new_block->host = host;
1628 if (host) {
1629 new_block->flags |= RAM_PREALLOC;
1631 if (resizeable) {
1632 new_block->flags |= RAM_RESIZEABLE;
1634 addr = ram_block_add(new_block, &local_err);
1635 if (local_err) {
1636 g_free(new_block);
1637 error_propagate(errp, local_err);
1638 return -1;
1640 return addr;
1643 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1644 MemoryRegion *mr, Error **errp)
1646 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1649 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1651 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1654 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1655 void (*resized)(const char*,
1656 uint64_t length,
1657 void *host),
1658 MemoryRegion *mr, Error **errp)
1660 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1663 static void reclaim_ramblock(RAMBlock *block)
1665 if (block->flags & RAM_PREALLOC) {
1667 } else if (xen_enabled()) {
1668 xen_invalidate_map_cache_entry(block->host);
1669 #ifndef _WIN32
1670 } else if (block->fd >= 0) {
1671 qemu_ram_munmap(block->host, block->max_length);
1672 close(block->fd);
1673 #endif
1674 } else {
1675 qemu_anon_ram_free(block->host, block->max_length);
1677 g_free(block);
1680 void qemu_ram_free(ram_addr_t addr)
1682 RAMBlock *block;
1684 qemu_mutex_lock_ramlist();
1685 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1686 if (addr == block->offset) {
1687 QLIST_REMOVE_RCU(block, next);
1688 ram_list.mru_block = NULL;
1689 /* Write list before version */
1690 smp_wmb();
1691 ram_list.version++;
1692 call_rcu(block, reclaim_ramblock, rcu);
1693 break;
1696 qemu_mutex_unlock_ramlist();
1699 #ifndef _WIN32
1700 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1702 RAMBlock *block;
1703 ram_addr_t offset;
1704 int flags;
1705 void *area, *vaddr;
1707 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1708 offset = addr - block->offset;
1709 if (offset < block->max_length) {
1710 vaddr = ramblock_ptr(block, offset);
1711 if (block->flags & RAM_PREALLOC) {
1713 } else if (xen_enabled()) {
1714 abort();
1715 } else {
1716 flags = MAP_FIXED;
1717 if (block->fd >= 0) {
1718 flags |= (block->flags & RAM_SHARED ?
1719 MAP_SHARED : MAP_PRIVATE);
1720 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1721 flags, block->fd, offset);
1722 } else {
1724 * Remap needs to match alloc. Accelerators that
1725 * set phys_mem_alloc never remap. If they did,
1726 * we'd need a remap hook here.
1728 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1730 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1731 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1732 flags, -1, 0);
1734 if (area != vaddr) {
1735 fprintf(stderr, "Could not remap addr: "
1736 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1737 length, addr);
1738 exit(1);
1740 memory_try_enable_merging(vaddr, length);
1741 qemu_ram_setup_dump(vaddr, length);
1746 #endif /* !_WIN32 */
1748 int qemu_get_ram_fd(ram_addr_t addr)
1750 RAMBlock *block;
1751 int fd;
1753 rcu_read_lock();
1754 block = qemu_get_ram_block(addr);
1755 fd = block->fd;
1756 rcu_read_unlock();
1757 return fd;
1760 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1762 RAMBlock *block;
1764 rcu_read_lock();
1765 block = qemu_get_ram_block(addr);
1766 block->fd = fd;
1767 rcu_read_unlock();
1770 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1772 RAMBlock *block;
1773 void *ptr;
1775 rcu_read_lock();
1776 block = qemu_get_ram_block(addr);
1777 ptr = ramblock_ptr(block, 0);
1778 rcu_read_unlock();
1779 return ptr;
1782 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1783 * This should not be used for general purpose DMA. Use address_space_map
1784 * or address_space_rw instead. For local memory (e.g. video ram) that the
1785 * device owns, use memory_region_get_ram_ptr.
1787 * Called within RCU critical section.
1789 void *qemu_get_ram_ptr(ram_addr_t addr)
1791 RAMBlock *block = qemu_get_ram_block(addr);
1793 if (xen_enabled() && block->host == NULL) {
1794 /* We need to check if the requested address is in the RAM
1795 * because we don't want to map the entire memory in QEMU.
1796 * In that case just map until the end of the page.
1798 if (block->offset == 0) {
1799 return xen_map_cache(addr, 0, 0);
1802 block->host = xen_map_cache(block->offset, block->max_length, 1);
1804 return ramblock_ptr(block, addr - block->offset);
1807 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1808 * but takes a size argument.
1810 * Called within RCU critical section.
1812 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1814 RAMBlock *block;
1815 ram_addr_t offset_inside_block;
1816 if (*size == 0) {
1817 return NULL;
1820 block = qemu_get_ram_block(addr);
1821 offset_inside_block = addr - block->offset;
1822 *size = MIN(*size, block->max_length - offset_inside_block);
1824 if (xen_enabled() && block->host == NULL) {
1825 /* We need to check if the requested address is in the RAM
1826 * because we don't want to map the entire memory in QEMU.
1827 * In that case just map the requested area.
1829 if (block->offset == 0) {
1830 return xen_map_cache(addr, *size, 1);
1833 block->host = xen_map_cache(block->offset, block->max_length, 1);
1836 return ramblock_ptr(block, offset_inside_block);
1840 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1841 * in that RAMBlock.
1843 * ptr: Host pointer to look up
1844 * round_offset: If true round the result offset down to a page boundary
1845 * *ram_addr: set to result ram_addr
1846 * *offset: set to result offset within the RAMBlock
1848 * Returns: RAMBlock (or NULL if not found)
1850 * By the time this function returns, the returned pointer is not protected
1851 * by RCU anymore. If the caller is not within an RCU critical section and
1852 * does not hold the iothread lock, it must have other means of protecting the
1853 * pointer, such as a reference to the region that includes the incoming
1854 * ram_addr_t.
1856 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1857 ram_addr_t *ram_addr,
1858 ram_addr_t *offset)
1860 RAMBlock *block;
1861 uint8_t *host = ptr;
1863 if (xen_enabled()) {
1864 rcu_read_lock();
1865 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1866 block = qemu_get_ram_block(*ram_addr);
1867 if (block) {
1868 *offset = (host - block->host);
1870 rcu_read_unlock();
1871 return block;
1874 rcu_read_lock();
1875 block = atomic_rcu_read(&ram_list.mru_block);
1876 if (block && block->host && host - block->host < block->max_length) {
1877 goto found;
1880 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1881 /* This case append when the block is not mapped. */
1882 if (block->host == NULL) {
1883 continue;
1885 if (host - block->host < block->max_length) {
1886 goto found;
1890 rcu_read_unlock();
1891 return NULL;
1893 found:
1894 *offset = (host - block->host);
1895 if (round_offset) {
1896 *offset &= TARGET_PAGE_MASK;
1898 *ram_addr = block->offset + *offset;
1899 rcu_read_unlock();
1900 return block;
1904 * Finds the named RAMBlock
1906 * name: The name of RAMBlock to find
1908 * Returns: RAMBlock (or NULL if not found)
1910 RAMBlock *qemu_ram_block_by_name(const char *name)
1912 RAMBlock *block;
1914 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1915 if (!strcmp(name, block->idstr)) {
1916 return block;
1920 return NULL;
1923 /* Some of the softmmu routines need to translate from a host pointer
1924 (typically a TLB entry) back to a ram offset. */
1925 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1927 RAMBlock *block;
1928 ram_addr_t offset; /* Not used */
1930 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1932 if (!block) {
1933 return NULL;
1936 return block->mr;
1939 /* Called within RCU critical section. */
1940 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1941 uint64_t val, unsigned size)
1943 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1944 tb_invalidate_phys_page_fast(ram_addr, size);
1946 switch (size) {
1947 case 1:
1948 stb_p(qemu_get_ram_ptr(ram_addr), val);
1949 break;
1950 case 2:
1951 stw_p(qemu_get_ram_ptr(ram_addr), val);
1952 break;
1953 case 4:
1954 stl_p(qemu_get_ram_ptr(ram_addr), val);
1955 break;
1956 default:
1957 abort();
1959 /* Set both VGA and migration bits for simplicity and to remove
1960 * the notdirty callback faster.
1962 cpu_physical_memory_set_dirty_range(ram_addr, size,
1963 DIRTY_CLIENTS_NOCODE);
1964 /* we remove the notdirty callback only if the code has been
1965 flushed */
1966 if (!cpu_physical_memory_is_clean(ram_addr)) {
1967 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1971 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1972 unsigned size, bool is_write)
1974 return is_write;
1977 static const MemoryRegionOps notdirty_mem_ops = {
1978 .write = notdirty_mem_write,
1979 .valid.accepts = notdirty_mem_accepts,
1980 .endianness = DEVICE_NATIVE_ENDIAN,
1983 /* Generate a debug exception if a watchpoint has been hit. */
1984 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1986 CPUState *cpu = current_cpu;
1987 CPUArchState *env = cpu->env_ptr;
1988 target_ulong pc, cs_base;
1989 target_ulong vaddr;
1990 CPUWatchpoint *wp;
1991 int cpu_flags;
1993 if (cpu->watchpoint_hit) {
1994 /* We re-entered the check after replacing the TB. Now raise
1995 * the debug interrupt so that is will trigger after the
1996 * current instruction. */
1997 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1998 return;
2000 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2001 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2002 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2003 && (wp->flags & flags)) {
2004 if (flags == BP_MEM_READ) {
2005 wp->flags |= BP_WATCHPOINT_HIT_READ;
2006 } else {
2007 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2009 wp->hitaddr = vaddr;
2010 wp->hitattrs = attrs;
2011 if (!cpu->watchpoint_hit) {
2012 cpu->watchpoint_hit = wp;
2013 tb_check_watchpoint(cpu);
2014 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2015 cpu->exception_index = EXCP_DEBUG;
2016 cpu_loop_exit(cpu);
2017 } else {
2018 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2019 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2020 cpu_resume_from_signal(cpu, NULL);
2023 } else {
2024 wp->flags &= ~BP_WATCHPOINT_HIT;
2029 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2030 so these check for a hit then pass through to the normal out-of-line
2031 phys routines. */
2032 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2033 unsigned size, MemTxAttrs attrs)
2035 MemTxResult res;
2036 uint64_t data;
2038 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2039 switch (size) {
2040 case 1:
2041 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2042 break;
2043 case 2:
2044 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2045 break;
2046 case 4:
2047 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2048 break;
2049 default: abort();
2051 *pdata = data;
2052 return res;
2055 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2056 uint64_t val, unsigned size,
2057 MemTxAttrs attrs)
2059 MemTxResult res;
2061 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2062 switch (size) {
2063 case 1:
2064 address_space_stb(&address_space_memory, addr, val, attrs, &res);
2065 break;
2066 case 2:
2067 address_space_stw(&address_space_memory, addr, val, attrs, &res);
2068 break;
2069 case 4:
2070 address_space_stl(&address_space_memory, addr, val, attrs, &res);
2071 break;
2072 default: abort();
2074 return res;
2077 static const MemoryRegionOps watch_mem_ops = {
2078 .read_with_attrs = watch_mem_read,
2079 .write_with_attrs = watch_mem_write,
2080 .endianness = DEVICE_NATIVE_ENDIAN,
2083 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2084 unsigned len, MemTxAttrs attrs)
2086 subpage_t *subpage = opaque;
2087 uint8_t buf[8];
2088 MemTxResult res;
2090 #if defined(DEBUG_SUBPAGE)
2091 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2092 subpage, len, addr);
2093 #endif
2094 res = address_space_read(subpage->as, addr + subpage->base,
2095 attrs, buf, len);
2096 if (res) {
2097 return res;
2099 switch (len) {
2100 case 1:
2101 *data = ldub_p(buf);
2102 return MEMTX_OK;
2103 case 2:
2104 *data = lduw_p(buf);
2105 return MEMTX_OK;
2106 case 4:
2107 *data = ldl_p(buf);
2108 return MEMTX_OK;
2109 case 8:
2110 *data = ldq_p(buf);
2111 return MEMTX_OK;
2112 default:
2113 abort();
2117 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2118 uint64_t value, unsigned len, MemTxAttrs attrs)
2120 subpage_t *subpage = opaque;
2121 uint8_t buf[8];
2123 #if defined(DEBUG_SUBPAGE)
2124 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2125 " value %"PRIx64"\n",
2126 __func__, subpage, len, addr, value);
2127 #endif
2128 switch (len) {
2129 case 1:
2130 stb_p(buf, value);
2131 break;
2132 case 2:
2133 stw_p(buf, value);
2134 break;
2135 case 4:
2136 stl_p(buf, value);
2137 break;
2138 case 8:
2139 stq_p(buf, value);
2140 break;
2141 default:
2142 abort();
2144 return address_space_write(subpage->as, addr + subpage->base,
2145 attrs, buf, len);
2148 static bool subpage_accepts(void *opaque, hwaddr addr,
2149 unsigned len, bool is_write)
2151 subpage_t *subpage = opaque;
2152 #if defined(DEBUG_SUBPAGE)
2153 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2154 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2155 #endif
2157 return address_space_access_valid(subpage->as, addr + subpage->base,
2158 len, is_write);
2161 static const MemoryRegionOps subpage_ops = {
2162 .read_with_attrs = subpage_read,
2163 .write_with_attrs = subpage_write,
2164 .impl.min_access_size = 1,
2165 .impl.max_access_size = 8,
2166 .valid.min_access_size = 1,
2167 .valid.max_access_size = 8,
2168 .valid.accepts = subpage_accepts,
2169 .endianness = DEVICE_NATIVE_ENDIAN,
2172 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2173 uint16_t section)
2175 int idx, eidx;
2177 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2178 return -1;
2179 idx = SUBPAGE_IDX(start);
2180 eidx = SUBPAGE_IDX(end);
2181 #if defined(DEBUG_SUBPAGE)
2182 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2183 __func__, mmio, start, end, idx, eidx, section);
2184 #endif
2185 for (; idx <= eidx; idx++) {
2186 mmio->sub_section[idx] = section;
2189 return 0;
2192 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2194 subpage_t *mmio;
2196 mmio = g_malloc0(sizeof(subpage_t));
2198 mmio->as = as;
2199 mmio->base = base;
2200 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2201 NULL, TARGET_PAGE_SIZE);
2202 mmio->iomem.subpage = true;
2203 #if defined(DEBUG_SUBPAGE)
2204 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2205 mmio, base, TARGET_PAGE_SIZE);
2206 #endif
2207 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2209 return mmio;
2212 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2213 MemoryRegion *mr)
2215 assert(as);
2216 MemoryRegionSection section = {
2217 .address_space = as,
2218 .mr = mr,
2219 .offset_within_address_space = 0,
2220 .offset_within_region = 0,
2221 .size = int128_2_64(),
2224 return phys_section_add(map, &section);
2227 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2229 CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2230 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2231 MemoryRegionSection *sections = d->map.sections;
2233 return sections[index & ~TARGET_PAGE_MASK].mr;
2236 static void io_mem_init(void)
2238 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2239 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2240 NULL, UINT64_MAX);
2241 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2242 NULL, UINT64_MAX);
2243 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2244 NULL, UINT64_MAX);
2247 static void mem_begin(MemoryListener *listener)
2249 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2250 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2251 uint16_t n;
2253 n = dummy_section(&d->map, as, &io_mem_unassigned);
2254 assert(n == PHYS_SECTION_UNASSIGNED);
2255 n = dummy_section(&d->map, as, &io_mem_notdirty);
2256 assert(n == PHYS_SECTION_NOTDIRTY);
2257 n = dummy_section(&d->map, as, &io_mem_rom);
2258 assert(n == PHYS_SECTION_ROM);
2259 n = dummy_section(&d->map, as, &io_mem_watch);
2260 assert(n == PHYS_SECTION_WATCH);
2262 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2263 d->as = as;
2264 as->next_dispatch = d;
2267 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2269 phys_sections_free(&d->map);
2270 g_free(d);
2273 static void mem_commit(MemoryListener *listener)
2275 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2276 AddressSpaceDispatch *cur = as->dispatch;
2277 AddressSpaceDispatch *next = as->next_dispatch;
2279 phys_page_compact_all(next, next->map.nodes_nb);
2281 atomic_rcu_set(&as->dispatch, next);
2282 if (cur) {
2283 call_rcu(cur, address_space_dispatch_free, rcu);
2287 static void tcg_commit(MemoryListener *listener)
2289 CPUAddressSpace *cpuas;
2290 AddressSpaceDispatch *d;
2292 /* since each CPU stores ram addresses in its TLB cache, we must
2293 reset the modified entries */
2294 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2295 cpu_reloading_memory_map();
2296 /* The CPU and TLB are protected by the iothread lock.
2297 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2298 * may have split the RCU critical section.
2300 d = atomic_rcu_read(&cpuas->as->dispatch);
2301 cpuas->memory_dispatch = d;
2302 tlb_flush(cpuas->cpu, 1);
2305 void address_space_init_dispatch(AddressSpace *as)
2307 as->dispatch = NULL;
2308 as->dispatch_listener = (MemoryListener) {
2309 .begin = mem_begin,
2310 .commit = mem_commit,
2311 .region_add = mem_add,
2312 .region_nop = mem_add,
2313 .priority = 0,
2315 memory_listener_register(&as->dispatch_listener, as);
2318 void address_space_unregister(AddressSpace *as)
2320 memory_listener_unregister(&as->dispatch_listener);
2323 void address_space_destroy_dispatch(AddressSpace *as)
2325 AddressSpaceDispatch *d = as->dispatch;
2327 atomic_rcu_set(&as->dispatch, NULL);
2328 if (d) {
2329 call_rcu(d, address_space_dispatch_free, rcu);
2333 static void memory_map_init(void)
2335 system_memory = g_malloc(sizeof(*system_memory));
2337 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2338 address_space_init(&address_space_memory, system_memory, "memory");
2340 system_io = g_malloc(sizeof(*system_io));
2341 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2342 65536);
2343 address_space_init(&address_space_io, system_io, "I/O");
2346 MemoryRegion *get_system_memory(void)
2348 return system_memory;
2351 MemoryRegion *get_system_io(void)
2353 return system_io;
2356 #endif /* !defined(CONFIG_USER_ONLY) */
2358 /* physical memory access (slow version, mainly for debug) */
2359 #if defined(CONFIG_USER_ONLY)
2360 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2361 uint8_t *buf, int len, int is_write)
2363 int l, flags;
2364 target_ulong page;
2365 void * p;
2367 while (len > 0) {
2368 page = addr & TARGET_PAGE_MASK;
2369 l = (page + TARGET_PAGE_SIZE) - addr;
2370 if (l > len)
2371 l = len;
2372 flags = page_get_flags(page);
2373 if (!(flags & PAGE_VALID))
2374 return -1;
2375 if (is_write) {
2376 if (!(flags & PAGE_WRITE))
2377 return -1;
2378 /* XXX: this code should not depend on lock_user */
2379 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2380 return -1;
2381 memcpy(p, buf, l);
2382 unlock_user(p, addr, l);
2383 } else {
2384 if (!(flags & PAGE_READ))
2385 return -1;
2386 /* XXX: this code should not depend on lock_user */
2387 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2388 return -1;
2389 memcpy(buf, p, l);
2390 unlock_user(p, addr, 0);
2392 len -= l;
2393 buf += l;
2394 addr += l;
2396 return 0;
2399 #else
2401 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2402 hwaddr length)
2404 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2405 /* No early return if dirty_log_mask is or becomes 0, because
2406 * cpu_physical_memory_set_dirty_range will still call
2407 * xen_modified_memory.
2409 if (dirty_log_mask) {
2410 dirty_log_mask =
2411 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2413 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2414 tb_invalidate_phys_range(addr, addr + length);
2415 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2417 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2420 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2422 unsigned access_size_max = mr->ops->valid.max_access_size;
2424 /* Regions are assumed to support 1-4 byte accesses unless
2425 otherwise specified. */
2426 if (access_size_max == 0) {
2427 access_size_max = 4;
2430 /* Bound the maximum access by the alignment of the address. */
2431 if (!mr->ops->impl.unaligned) {
2432 unsigned align_size_max = addr & -addr;
2433 if (align_size_max != 0 && align_size_max < access_size_max) {
2434 access_size_max = align_size_max;
2438 /* Don't attempt accesses larger than the maximum. */
2439 if (l > access_size_max) {
2440 l = access_size_max;
2442 l = pow2floor(l);
2444 return l;
2447 static bool prepare_mmio_access(MemoryRegion *mr)
2449 bool unlocked = !qemu_mutex_iothread_locked();
2450 bool release_lock = false;
2452 if (unlocked && mr->global_locking) {
2453 qemu_mutex_lock_iothread();
2454 unlocked = false;
2455 release_lock = true;
2457 if (mr->flush_coalesced_mmio) {
2458 if (unlocked) {
2459 qemu_mutex_lock_iothread();
2461 qemu_flush_coalesced_mmio_buffer();
2462 if (unlocked) {
2463 qemu_mutex_unlock_iothread();
2467 return release_lock;
2470 /* Called within RCU critical section. */
2471 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2472 MemTxAttrs attrs,
2473 const uint8_t *buf,
2474 int len, hwaddr addr1,
2475 hwaddr l, MemoryRegion *mr)
2477 uint8_t *ptr;
2478 uint64_t val;
2479 MemTxResult result = MEMTX_OK;
2480 bool release_lock = false;
2482 for (;;) {
2483 if (!memory_access_is_direct(mr, true)) {
2484 release_lock |= prepare_mmio_access(mr);
2485 l = memory_access_size(mr, l, addr1);
2486 /* XXX: could force current_cpu to NULL to avoid
2487 potential bugs */
2488 switch (l) {
2489 case 8:
2490 /* 64 bit write access */
2491 val = ldq_p(buf);
2492 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2493 attrs);
2494 break;
2495 case 4:
2496 /* 32 bit write access */
2497 val = ldl_p(buf);
2498 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2499 attrs);
2500 break;
2501 case 2:
2502 /* 16 bit write access */
2503 val = lduw_p(buf);
2504 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2505 attrs);
2506 break;
2507 case 1:
2508 /* 8 bit write access */
2509 val = ldub_p(buf);
2510 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2511 attrs);
2512 break;
2513 default:
2514 abort();
2516 } else {
2517 addr1 += memory_region_get_ram_addr(mr);
2518 /* RAM case */
2519 ptr = qemu_get_ram_ptr(addr1);
2520 memcpy(ptr, buf, l);
2521 invalidate_and_set_dirty(mr, addr1, l);
2524 if (release_lock) {
2525 qemu_mutex_unlock_iothread();
2526 release_lock = false;
2529 len -= l;
2530 buf += l;
2531 addr += l;
2533 if (!len) {
2534 break;
2537 l = len;
2538 mr = address_space_translate(as, addr, &addr1, &l, true);
2541 return result;
2544 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2545 const uint8_t *buf, int len)
2547 hwaddr l;
2548 hwaddr addr1;
2549 MemoryRegion *mr;
2550 MemTxResult result = MEMTX_OK;
2552 if (len > 0) {
2553 rcu_read_lock();
2554 l = len;
2555 mr = address_space_translate(as, addr, &addr1, &l, true);
2556 result = address_space_write_continue(as, addr, attrs, buf, len,
2557 addr1, l, mr);
2558 rcu_read_unlock();
2561 return result;
2564 /* Called within RCU critical section. */
2565 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2566 MemTxAttrs attrs, uint8_t *buf,
2567 int len, hwaddr addr1, hwaddr l,
2568 MemoryRegion *mr)
2570 uint8_t *ptr;
2571 uint64_t val;
2572 MemTxResult result = MEMTX_OK;
2573 bool release_lock = false;
2575 for (;;) {
2576 if (!memory_access_is_direct(mr, false)) {
2577 /* I/O case */
2578 release_lock |= prepare_mmio_access(mr);
2579 l = memory_access_size(mr, l, addr1);
2580 switch (l) {
2581 case 8:
2582 /* 64 bit read access */
2583 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2584 attrs);
2585 stq_p(buf, val);
2586 break;
2587 case 4:
2588 /* 32 bit read access */
2589 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2590 attrs);
2591 stl_p(buf, val);
2592 break;
2593 case 2:
2594 /* 16 bit read access */
2595 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2596 attrs);
2597 stw_p(buf, val);
2598 break;
2599 case 1:
2600 /* 8 bit read access */
2601 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2602 attrs);
2603 stb_p(buf, val);
2604 break;
2605 default:
2606 abort();
2608 } else {
2609 /* RAM case */
2610 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2611 memcpy(buf, ptr, l);
2614 if (release_lock) {
2615 qemu_mutex_unlock_iothread();
2616 release_lock = false;
2619 len -= l;
2620 buf += l;
2621 addr += l;
2623 if (!len) {
2624 break;
2627 l = len;
2628 mr = address_space_translate(as, addr, &addr1, &l, false);
2631 return result;
2634 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2635 MemTxAttrs attrs, uint8_t *buf, int len)
2637 hwaddr l;
2638 hwaddr addr1;
2639 MemoryRegion *mr;
2640 MemTxResult result = MEMTX_OK;
2642 if (len > 0) {
2643 rcu_read_lock();
2644 l = len;
2645 mr = address_space_translate(as, addr, &addr1, &l, false);
2646 result = address_space_read_continue(as, addr, attrs, buf, len,
2647 addr1, l, mr);
2648 rcu_read_unlock();
2651 return result;
2654 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2655 uint8_t *buf, int len, bool is_write)
2657 if (is_write) {
2658 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2659 } else {
2660 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2664 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2665 int len, int is_write)
2667 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2668 buf, len, is_write);
2671 enum write_rom_type {
2672 WRITE_DATA,
2673 FLUSH_CACHE,
2676 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2677 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2679 hwaddr l;
2680 uint8_t *ptr;
2681 hwaddr addr1;
2682 MemoryRegion *mr;
2684 rcu_read_lock();
2685 while (len > 0) {
2686 l = len;
2687 mr = address_space_translate(as, addr, &addr1, &l, true);
2689 if (!(memory_region_is_ram(mr) ||
2690 memory_region_is_romd(mr))) {
2691 l = memory_access_size(mr, l, addr1);
2692 } else {
2693 addr1 += memory_region_get_ram_addr(mr);
2694 /* ROM/RAM case */
2695 ptr = qemu_get_ram_ptr(addr1);
2696 switch (type) {
2697 case WRITE_DATA:
2698 memcpy(ptr, buf, l);
2699 invalidate_and_set_dirty(mr, addr1, l);
2700 break;
2701 case FLUSH_CACHE:
2702 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2703 break;
2706 len -= l;
2707 buf += l;
2708 addr += l;
2710 rcu_read_unlock();
2713 /* used for ROM loading : can write in RAM and ROM */
2714 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2715 const uint8_t *buf, int len)
2717 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2720 void cpu_flush_icache_range(hwaddr start, int len)
2723 * This function should do the same thing as an icache flush that was
2724 * triggered from within the guest. For TCG we are always cache coherent,
2725 * so there is no need to flush anything. For KVM / Xen we need to flush
2726 * the host's instruction cache at least.
2728 if (tcg_enabled()) {
2729 return;
2732 cpu_physical_memory_write_rom_internal(&address_space_memory,
2733 start, NULL, len, FLUSH_CACHE);
2736 typedef struct {
2737 MemoryRegion *mr;
2738 void *buffer;
2739 hwaddr addr;
2740 hwaddr len;
2741 bool in_use;
2742 } BounceBuffer;
2744 static BounceBuffer bounce;
2746 typedef struct MapClient {
2747 QEMUBH *bh;
2748 QLIST_ENTRY(MapClient) link;
2749 } MapClient;
2751 QemuMutex map_client_list_lock;
2752 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2753 = QLIST_HEAD_INITIALIZER(map_client_list);
2755 static void cpu_unregister_map_client_do(MapClient *client)
2757 QLIST_REMOVE(client, link);
2758 g_free(client);
2761 static void cpu_notify_map_clients_locked(void)
2763 MapClient *client;
2765 while (!QLIST_EMPTY(&map_client_list)) {
2766 client = QLIST_FIRST(&map_client_list);
2767 qemu_bh_schedule(client->bh);
2768 cpu_unregister_map_client_do(client);
2772 void cpu_register_map_client(QEMUBH *bh)
2774 MapClient *client = g_malloc(sizeof(*client));
2776 qemu_mutex_lock(&map_client_list_lock);
2777 client->bh = bh;
2778 QLIST_INSERT_HEAD(&map_client_list, client, link);
2779 if (!atomic_read(&bounce.in_use)) {
2780 cpu_notify_map_clients_locked();
2782 qemu_mutex_unlock(&map_client_list_lock);
2785 void cpu_exec_init_all(void)
2787 qemu_mutex_init(&ram_list.mutex);
2788 io_mem_init();
2789 memory_map_init();
2790 qemu_mutex_init(&map_client_list_lock);
2793 void cpu_unregister_map_client(QEMUBH *bh)
2795 MapClient *client;
2797 qemu_mutex_lock(&map_client_list_lock);
2798 QLIST_FOREACH(client, &map_client_list, link) {
2799 if (client->bh == bh) {
2800 cpu_unregister_map_client_do(client);
2801 break;
2804 qemu_mutex_unlock(&map_client_list_lock);
2807 static void cpu_notify_map_clients(void)
2809 qemu_mutex_lock(&map_client_list_lock);
2810 cpu_notify_map_clients_locked();
2811 qemu_mutex_unlock(&map_client_list_lock);
2814 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2816 MemoryRegion *mr;
2817 hwaddr l, xlat;
2819 rcu_read_lock();
2820 while (len > 0) {
2821 l = len;
2822 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2823 if (!memory_access_is_direct(mr, is_write)) {
2824 l = memory_access_size(mr, l, addr);
2825 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2826 return false;
2830 len -= l;
2831 addr += l;
2833 rcu_read_unlock();
2834 return true;
2837 /* Map a physical memory region into a host virtual address.
2838 * May map a subset of the requested range, given by and returned in *plen.
2839 * May return NULL if resources needed to perform the mapping are exhausted.
2840 * Use only for reads OR writes - not for read-modify-write operations.
2841 * Use cpu_register_map_client() to know when retrying the map operation is
2842 * likely to succeed.
2844 void *address_space_map(AddressSpace *as,
2845 hwaddr addr,
2846 hwaddr *plen,
2847 bool is_write)
2849 hwaddr len = *plen;
2850 hwaddr done = 0;
2851 hwaddr l, xlat, base;
2852 MemoryRegion *mr, *this_mr;
2853 ram_addr_t raddr;
2854 void *ptr;
2856 if (len == 0) {
2857 return NULL;
2860 l = len;
2861 rcu_read_lock();
2862 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2864 if (!memory_access_is_direct(mr, is_write)) {
2865 if (atomic_xchg(&bounce.in_use, true)) {
2866 rcu_read_unlock();
2867 return NULL;
2869 /* Avoid unbounded allocations */
2870 l = MIN(l, TARGET_PAGE_SIZE);
2871 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2872 bounce.addr = addr;
2873 bounce.len = l;
2875 memory_region_ref(mr);
2876 bounce.mr = mr;
2877 if (!is_write) {
2878 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2879 bounce.buffer, l);
2882 rcu_read_unlock();
2883 *plen = l;
2884 return bounce.buffer;
2887 base = xlat;
2888 raddr = memory_region_get_ram_addr(mr);
2890 for (;;) {
2891 len -= l;
2892 addr += l;
2893 done += l;
2894 if (len == 0) {
2895 break;
2898 l = len;
2899 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2900 if (this_mr != mr || xlat != base + done) {
2901 break;
2905 memory_region_ref(mr);
2906 *plen = done;
2907 ptr = qemu_ram_ptr_length(raddr + base, plen);
2908 rcu_read_unlock();
2910 return ptr;
2913 /* Unmaps a memory region previously mapped by address_space_map().
2914 * Will also mark the memory as dirty if is_write == 1. access_len gives
2915 * the amount of memory that was actually read or written by the caller.
2917 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2918 int is_write, hwaddr access_len)
2920 if (buffer != bounce.buffer) {
2921 MemoryRegion *mr;
2922 ram_addr_t addr1;
2924 mr = qemu_ram_addr_from_host(buffer, &addr1);
2925 assert(mr != NULL);
2926 if (is_write) {
2927 invalidate_and_set_dirty(mr, addr1, access_len);
2929 if (xen_enabled()) {
2930 xen_invalidate_map_cache_entry(buffer);
2932 memory_region_unref(mr);
2933 return;
2935 if (is_write) {
2936 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2937 bounce.buffer, access_len);
2939 qemu_vfree(bounce.buffer);
2940 bounce.buffer = NULL;
2941 memory_region_unref(bounce.mr);
2942 atomic_mb_set(&bounce.in_use, false);
2943 cpu_notify_map_clients();
2946 void *cpu_physical_memory_map(hwaddr addr,
2947 hwaddr *plen,
2948 int is_write)
2950 return address_space_map(&address_space_memory, addr, plen, is_write);
2953 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2954 int is_write, hwaddr access_len)
2956 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2959 /* warning: addr must be aligned */
2960 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2961 MemTxAttrs attrs,
2962 MemTxResult *result,
2963 enum device_endian endian)
2965 uint8_t *ptr;
2966 uint64_t val;
2967 MemoryRegion *mr;
2968 hwaddr l = 4;
2969 hwaddr addr1;
2970 MemTxResult r;
2971 bool release_lock = false;
2973 rcu_read_lock();
2974 mr = address_space_translate(as, addr, &addr1, &l, false);
2975 if (l < 4 || !memory_access_is_direct(mr, false)) {
2976 release_lock |= prepare_mmio_access(mr);
2978 /* I/O case */
2979 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2980 #if defined(TARGET_WORDS_BIGENDIAN)
2981 if (endian == DEVICE_LITTLE_ENDIAN) {
2982 val = bswap32(val);
2984 #else
2985 if (endian == DEVICE_BIG_ENDIAN) {
2986 val = bswap32(val);
2988 #endif
2989 } else {
2990 /* RAM case */
2991 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2992 & TARGET_PAGE_MASK)
2993 + addr1);
2994 switch (endian) {
2995 case DEVICE_LITTLE_ENDIAN:
2996 val = ldl_le_p(ptr);
2997 break;
2998 case DEVICE_BIG_ENDIAN:
2999 val = ldl_be_p(ptr);
3000 break;
3001 default:
3002 val = ldl_p(ptr);
3003 break;
3005 r = MEMTX_OK;
3007 if (result) {
3008 *result = r;
3010 if (release_lock) {
3011 qemu_mutex_unlock_iothread();
3013 rcu_read_unlock();
3014 return val;
3017 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3018 MemTxAttrs attrs, MemTxResult *result)
3020 return address_space_ldl_internal(as, addr, attrs, result,
3021 DEVICE_NATIVE_ENDIAN);
3024 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3025 MemTxAttrs attrs, MemTxResult *result)
3027 return address_space_ldl_internal(as, addr, attrs, result,
3028 DEVICE_LITTLE_ENDIAN);
3031 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3032 MemTxAttrs attrs, MemTxResult *result)
3034 return address_space_ldl_internal(as, addr, attrs, result,
3035 DEVICE_BIG_ENDIAN);
3038 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3040 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3043 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3045 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3048 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3050 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3053 /* warning: addr must be aligned */
3054 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3055 MemTxAttrs attrs,
3056 MemTxResult *result,
3057 enum device_endian endian)
3059 uint8_t *ptr;
3060 uint64_t val;
3061 MemoryRegion *mr;
3062 hwaddr l = 8;
3063 hwaddr addr1;
3064 MemTxResult r;
3065 bool release_lock = false;
3067 rcu_read_lock();
3068 mr = address_space_translate(as, addr, &addr1, &l,
3069 false);
3070 if (l < 8 || !memory_access_is_direct(mr, false)) {
3071 release_lock |= prepare_mmio_access(mr);
3073 /* I/O case */
3074 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3075 #if defined(TARGET_WORDS_BIGENDIAN)
3076 if (endian == DEVICE_LITTLE_ENDIAN) {
3077 val = bswap64(val);
3079 #else
3080 if (endian == DEVICE_BIG_ENDIAN) {
3081 val = bswap64(val);
3083 #endif
3084 } else {
3085 /* RAM case */
3086 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3087 & TARGET_PAGE_MASK)
3088 + addr1);
3089 switch (endian) {
3090 case DEVICE_LITTLE_ENDIAN:
3091 val = ldq_le_p(ptr);
3092 break;
3093 case DEVICE_BIG_ENDIAN:
3094 val = ldq_be_p(ptr);
3095 break;
3096 default:
3097 val = ldq_p(ptr);
3098 break;
3100 r = MEMTX_OK;
3102 if (result) {
3103 *result = r;
3105 if (release_lock) {
3106 qemu_mutex_unlock_iothread();
3108 rcu_read_unlock();
3109 return val;
3112 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3113 MemTxAttrs attrs, MemTxResult *result)
3115 return address_space_ldq_internal(as, addr, attrs, result,
3116 DEVICE_NATIVE_ENDIAN);
3119 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3120 MemTxAttrs attrs, MemTxResult *result)
3122 return address_space_ldq_internal(as, addr, attrs, result,
3123 DEVICE_LITTLE_ENDIAN);
3126 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3127 MemTxAttrs attrs, MemTxResult *result)
3129 return address_space_ldq_internal(as, addr, attrs, result,
3130 DEVICE_BIG_ENDIAN);
3133 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3135 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3138 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3140 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3143 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3145 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3148 /* XXX: optimize */
3149 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3150 MemTxAttrs attrs, MemTxResult *result)
3152 uint8_t val;
3153 MemTxResult r;
3155 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3156 if (result) {
3157 *result = r;
3159 return val;
3162 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3164 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3167 /* warning: addr must be aligned */
3168 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3169 hwaddr addr,
3170 MemTxAttrs attrs,
3171 MemTxResult *result,
3172 enum device_endian endian)
3174 uint8_t *ptr;
3175 uint64_t val;
3176 MemoryRegion *mr;
3177 hwaddr l = 2;
3178 hwaddr addr1;
3179 MemTxResult r;
3180 bool release_lock = false;
3182 rcu_read_lock();
3183 mr = address_space_translate(as, addr, &addr1, &l,
3184 false);
3185 if (l < 2 || !memory_access_is_direct(mr, false)) {
3186 release_lock |= prepare_mmio_access(mr);
3188 /* I/O case */
3189 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3190 #if defined(TARGET_WORDS_BIGENDIAN)
3191 if (endian == DEVICE_LITTLE_ENDIAN) {
3192 val = bswap16(val);
3194 #else
3195 if (endian == DEVICE_BIG_ENDIAN) {
3196 val = bswap16(val);
3198 #endif
3199 } else {
3200 /* RAM case */
3201 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3202 & TARGET_PAGE_MASK)
3203 + addr1);
3204 switch (endian) {
3205 case DEVICE_LITTLE_ENDIAN:
3206 val = lduw_le_p(ptr);
3207 break;
3208 case DEVICE_BIG_ENDIAN:
3209 val = lduw_be_p(ptr);
3210 break;
3211 default:
3212 val = lduw_p(ptr);
3213 break;
3215 r = MEMTX_OK;
3217 if (result) {
3218 *result = r;
3220 if (release_lock) {
3221 qemu_mutex_unlock_iothread();
3223 rcu_read_unlock();
3224 return val;
3227 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3228 MemTxAttrs attrs, MemTxResult *result)
3230 return address_space_lduw_internal(as, addr, attrs, result,
3231 DEVICE_NATIVE_ENDIAN);
3234 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3235 MemTxAttrs attrs, MemTxResult *result)
3237 return address_space_lduw_internal(as, addr, attrs, result,
3238 DEVICE_LITTLE_ENDIAN);
3241 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3242 MemTxAttrs attrs, MemTxResult *result)
3244 return address_space_lduw_internal(as, addr, attrs, result,
3245 DEVICE_BIG_ENDIAN);
3248 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3250 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3253 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3255 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3258 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3260 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3263 /* warning: addr must be aligned. The ram page is not masked as dirty
3264 and the code inside is not invalidated. It is useful if the dirty
3265 bits are used to track modified PTEs */
3266 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3267 MemTxAttrs attrs, MemTxResult *result)
3269 uint8_t *ptr;
3270 MemoryRegion *mr;
3271 hwaddr l = 4;
3272 hwaddr addr1;
3273 MemTxResult r;
3274 uint8_t dirty_log_mask;
3275 bool release_lock = false;
3277 rcu_read_lock();
3278 mr = address_space_translate(as, addr, &addr1, &l,
3279 true);
3280 if (l < 4 || !memory_access_is_direct(mr, true)) {
3281 release_lock |= prepare_mmio_access(mr);
3283 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3284 } else {
3285 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3286 ptr = qemu_get_ram_ptr(addr1);
3287 stl_p(ptr, val);
3289 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3290 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3291 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3292 r = MEMTX_OK;
3294 if (result) {
3295 *result = r;
3297 if (release_lock) {
3298 qemu_mutex_unlock_iothread();
3300 rcu_read_unlock();
3303 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3305 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3308 /* warning: addr must be aligned */
3309 static inline void address_space_stl_internal(AddressSpace *as,
3310 hwaddr addr, uint32_t val,
3311 MemTxAttrs attrs,
3312 MemTxResult *result,
3313 enum device_endian endian)
3315 uint8_t *ptr;
3316 MemoryRegion *mr;
3317 hwaddr l = 4;
3318 hwaddr addr1;
3319 MemTxResult r;
3320 bool release_lock = false;
3322 rcu_read_lock();
3323 mr = address_space_translate(as, addr, &addr1, &l,
3324 true);
3325 if (l < 4 || !memory_access_is_direct(mr, true)) {
3326 release_lock |= prepare_mmio_access(mr);
3328 #if defined(TARGET_WORDS_BIGENDIAN)
3329 if (endian == DEVICE_LITTLE_ENDIAN) {
3330 val = bswap32(val);
3332 #else
3333 if (endian == DEVICE_BIG_ENDIAN) {
3334 val = bswap32(val);
3336 #endif
3337 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3338 } else {
3339 /* RAM case */
3340 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3341 ptr = qemu_get_ram_ptr(addr1);
3342 switch (endian) {
3343 case DEVICE_LITTLE_ENDIAN:
3344 stl_le_p(ptr, val);
3345 break;
3346 case DEVICE_BIG_ENDIAN:
3347 stl_be_p(ptr, val);
3348 break;
3349 default:
3350 stl_p(ptr, val);
3351 break;
3353 invalidate_and_set_dirty(mr, addr1, 4);
3354 r = MEMTX_OK;
3356 if (result) {
3357 *result = r;
3359 if (release_lock) {
3360 qemu_mutex_unlock_iothread();
3362 rcu_read_unlock();
3365 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3366 MemTxAttrs attrs, MemTxResult *result)
3368 address_space_stl_internal(as, addr, val, attrs, result,
3369 DEVICE_NATIVE_ENDIAN);
3372 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3373 MemTxAttrs attrs, MemTxResult *result)
3375 address_space_stl_internal(as, addr, val, attrs, result,
3376 DEVICE_LITTLE_ENDIAN);
3379 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3380 MemTxAttrs attrs, MemTxResult *result)
3382 address_space_stl_internal(as, addr, val, attrs, result,
3383 DEVICE_BIG_ENDIAN);
3386 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3388 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3391 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3393 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3396 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3398 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3401 /* XXX: optimize */
3402 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3403 MemTxAttrs attrs, MemTxResult *result)
3405 uint8_t v = val;
3406 MemTxResult r;
3408 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3409 if (result) {
3410 *result = r;
3414 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3416 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3419 /* warning: addr must be aligned */
3420 static inline void address_space_stw_internal(AddressSpace *as,
3421 hwaddr addr, uint32_t val,
3422 MemTxAttrs attrs,
3423 MemTxResult *result,
3424 enum device_endian endian)
3426 uint8_t *ptr;
3427 MemoryRegion *mr;
3428 hwaddr l = 2;
3429 hwaddr addr1;
3430 MemTxResult r;
3431 bool release_lock = false;
3433 rcu_read_lock();
3434 mr = address_space_translate(as, addr, &addr1, &l, true);
3435 if (l < 2 || !memory_access_is_direct(mr, true)) {
3436 release_lock |= prepare_mmio_access(mr);
3438 #if defined(TARGET_WORDS_BIGENDIAN)
3439 if (endian == DEVICE_LITTLE_ENDIAN) {
3440 val = bswap16(val);
3442 #else
3443 if (endian == DEVICE_BIG_ENDIAN) {
3444 val = bswap16(val);
3446 #endif
3447 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3448 } else {
3449 /* RAM case */
3450 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3451 ptr = qemu_get_ram_ptr(addr1);
3452 switch (endian) {
3453 case DEVICE_LITTLE_ENDIAN:
3454 stw_le_p(ptr, val);
3455 break;
3456 case DEVICE_BIG_ENDIAN:
3457 stw_be_p(ptr, val);
3458 break;
3459 default:
3460 stw_p(ptr, val);
3461 break;
3463 invalidate_and_set_dirty(mr, addr1, 2);
3464 r = MEMTX_OK;
3466 if (result) {
3467 *result = r;
3469 if (release_lock) {
3470 qemu_mutex_unlock_iothread();
3472 rcu_read_unlock();
3475 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3476 MemTxAttrs attrs, MemTxResult *result)
3478 address_space_stw_internal(as, addr, val, attrs, result,
3479 DEVICE_NATIVE_ENDIAN);
3482 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3483 MemTxAttrs attrs, MemTxResult *result)
3485 address_space_stw_internal(as, addr, val, attrs, result,
3486 DEVICE_LITTLE_ENDIAN);
3489 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3490 MemTxAttrs attrs, MemTxResult *result)
3492 address_space_stw_internal(as, addr, val, attrs, result,
3493 DEVICE_BIG_ENDIAN);
3496 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3498 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3501 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3503 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3506 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3508 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3511 /* XXX: optimize */
3512 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3513 MemTxAttrs attrs, MemTxResult *result)
3515 MemTxResult r;
3516 val = tswap64(val);
3517 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3518 if (result) {
3519 *result = r;
3523 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3524 MemTxAttrs attrs, MemTxResult *result)
3526 MemTxResult r;
3527 val = cpu_to_le64(val);
3528 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3529 if (result) {
3530 *result = r;
3533 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3534 MemTxAttrs attrs, MemTxResult *result)
3536 MemTxResult r;
3537 val = cpu_to_be64(val);
3538 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3539 if (result) {
3540 *result = r;
3544 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3546 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3549 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3551 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3554 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3556 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3559 /* virtual memory access for debug (includes writing to ROM) */
3560 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3561 uint8_t *buf, int len, int is_write)
3563 int l;
3564 hwaddr phys_addr;
3565 target_ulong page;
3567 while (len > 0) {
3568 page = addr & TARGET_PAGE_MASK;
3569 phys_addr = cpu_get_phys_page_debug(cpu, page);
3570 /* if no physical page mapped, return an error */
3571 if (phys_addr == -1)
3572 return -1;
3573 l = (page + TARGET_PAGE_SIZE) - addr;
3574 if (l > len)
3575 l = len;
3576 phys_addr += (addr & ~TARGET_PAGE_MASK);
3577 if (is_write) {
3578 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3579 } else {
3580 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3581 buf, l, 0);
3583 len -= l;
3584 buf += l;
3585 addr += l;
3587 return 0;
3591 * Allows code that needs to deal with migration bitmaps etc to still be built
3592 * target independent.
3594 size_t qemu_target_page_bits(void)
3596 return TARGET_PAGE_BITS;
3599 #endif
3602 * A helper function for the _utterly broken_ virtio device model to find out if
3603 * it's running on a big endian machine. Don't do this at home kids!
3605 bool target_words_bigendian(void);
3606 bool target_words_bigendian(void)
3608 #if defined(TARGET_WORDS_BIGENDIAN)
3609 return true;
3610 #else
3611 return false;
3612 #endif
3615 #ifndef CONFIG_USER_ONLY
3616 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3618 MemoryRegion*mr;
3619 hwaddr l = 1;
3620 bool res;
3622 rcu_read_lock();
3623 mr = address_space_translate(&address_space_memory,
3624 phys_addr, &phys_addr, &l, false);
3626 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3627 rcu_read_unlock();
3628 return res;
3631 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3633 RAMBlock *block;
3634 int ret = 0;
3636 rcu_read_lock();
3637 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3638 ret = func(block->idstr, block->host, block->offset,
3639 block->used_length, opaque);
3640 if (ret) {
3641 break;
3644 rcu_read_unlock();
3645 return ret;
3647 #endif