virtio-rng: ask for more data if queue is not fully drained
[qemu/kevin.git] / exec.c
blobc62c43903c166e0ed0b84805784fe7d934fb0edf
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #ifndef _WIN32
21 #include <sys/mman.h>
22 #endif
24 #include "qemu-common.h"
25 #include "cpu.h"
26 #include "tcg.h"
27 #include "hw/hw.h"
28 #if !defined(CONFIG_USER_ONLY)
29 #include "hw/boards.h"
30 #endif
31 #include "hw/qdev.h"
32 #include "sysemu/kvm.h"
33 #include "sysemu/sysemu.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
45 #include "trace.h"
46 #endif
47 #include "exec/cpu-all.h"
48 #include "qemu/rcu_queue.h"
49 #include "qemu/main-loop.h"
50 #include "translate-all.h"
51 #include "sysemu/replay.h"
53 #include "exec/memory-internal.h"
54 #include "exec/ram_addr.h"
55 #include "exec/log.h"
57 #include "qemu/range.h"
58 #ifndef _WIN32
59 #include "qemu/mmap-alloc.h"
60 #endif
62 //#define DEBUG_SUBPAGE
64 #if !defined(CONFIG_USER_ONLY)
65 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
66 * are protected by the ramlist lock.
68 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
70 static MemoryRegion *system_memory;
71 static MemoryRegion *system_io;
73 AddressSpace address_space_io;
74 AddressSpace address_space_memory;
76 MemoryRegion io_mem_rom, io_mem_notdirty;
77 static MemoryRegion io_mem_unassigned;
79 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
80 #define RAM_PREALLOC (1 << 0)
82 /* RAM is mmap-ed with MAP_SHARED */
83 #define RAM_SHARED (1 << 1)
85 /* Only a portion of RAM (used_length) is actually used, and migrated.
86 * This used_length size can change across reboots.
88 #define RAM_RESIZEABLE (1 << 2)
90 #endif
92 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
93 /* current CPU in the current thread. It is only valid inside
94 cpu_exec() */
95 __thread CPUState *current_cpu;
96 /* 0 = Do not count executed instructions.
97 1 = Precise instruction counting.
98 2 = Adaptive rate instruction counting. */
99 int use_icount;
101 #if !defined(CONFIG_USER_ONLY)
103 typedef struct PhysPageEntry PhysPageEntry;
105 struct PhysPageEntry {
106 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
107 uint32_t skip : 6;
108 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
109 uint32_t ptr : 26;
112 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
114 /* Size of the L2 (and L3, etc) page tables. */
115 #define ADDR_SPACE_BITS 64
117 #define P_L2_BITS 9
118 #define P_L2_SIZE (1 << P_L2_BITS)
120 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
122 typedef PhysPageEntry Node[P_L2_SIZE];
124 typedef struct PhysPageMap {
125 struct rcu_head rcu;
127 unsigned sections_nb;
128 unsigned sections_nb_alloc;
129 unsigned nodes_nb;
130 unsigned nodes_nb_alloc;
131 Node *nodes;
132 MemoryRegionSection *sections;
133 } PhysPageMap;
135 struct AddressSpaceDispatch {
136 struct rcu_head rcu;
138 /* This is a multi-level map on the physical address space.
139 * The bottom level has pointers to MemoryRegionSections.
141 PhysPageEntry phys_map;
142 PhysPageMap map;
143 AddressSpace *as;
146 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
147 typedef struct subpage_t {
148 MemoryRegion iomem;
149 AddressSpace *as;
150 hwaddr base;
151 uint16_t sub_section[TARGET_PAGE_SIZE];
152 } subpage_t;
154 #define PHYS_SECTION_UNASSIGNED 0
155 #define PHYS_SECTION_NOTDIRTY 1
156 #define PHYS_SECTION_ROM 2
157 #define PHYS_SECTION_WATCH 3
159 static void io_mem_init(void);
160 static void memory_map_init(void);
161 static void tcg_commit(MemoryListener *listener);
163 static MemoryRegion io_mem_watch;
166 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
167 * @cpu: the CPU whose AddressSpace this is
168 * @as: the AddressSpace itself
169 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
170 * @tcg_as_listener: listener for tracking changes to the AddressSpace
172 struct CPUAddressSpace {
173 CPUState *cpu;
174 AddressSpace *as;
175 struct AddressSpaceDispatch *memory_dispatch;
176 MemoryListener tcg_as_listener;
179 #endif
181 #if !defined(CONFIG_USER_ONLY)
183 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
185 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
186 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
187 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
188 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
192 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
194 unsigned i;
195 uint32_t ret;
196 PhysPageEntry e;
197 PhysPageEntry *p;
199 ret = map->nodes_nb++;
200 p = map->nodes[ret];
201 assert(ret != PHYS_MAP_NODE_NIL);
202 assert(ret != map->nodes_nb_alloc);
204 e.skip = leaf ? 0 : 1;
205 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
206 for (i = 0; i < P_L2_SIZE; ++i) {
207 memcpy(&p[i], &e, sizeof(e));
209 return ret;
212 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
213 hwaddr *index, hwaddr *nb, uint16_t leaf,
214 int level)
216 PhysPageEntry *p;
217 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
219 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
220 lp->ptr = phys_map_node_alloc(map, level == 0);
222 p = map->nodes[lp->ptr];
223 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
225 while (*nb && lp < &p[P_L2_SIZE]) {
226 if ((*index & (step - 1)) == 0 && *nb >= step) {
227 lp->skip = 0;
228 lp->ptr = leaf;
229 *index += step;
230 *nb -= step;
231 } else {
232 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
234 ++lp;
238 static void phys_page_set(AddressSpaceDispatch *d,
239 hwaddr index, hwaddr nb,
240 uint16_t leaf)
242 /* Wildly overreserve - it doesn't matter much. */
243 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
245 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
248 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
249 * and update our entry so we can skip it and go directly to the destination.
251 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
253 unsigned valid_ptr = P_L2_SIZE;
254 int valid = 0;
255 PhysPageEntry *p;
256 int i;
258 if (lp->ptr == PHYS_MAP_NODE_NIL) {
259 return;
262 p = nodes[lp->ptr];
263 for (i = 0; i < P_L2_SIZE; i++) {
264 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
265 continue;
268 valid_ptr = i;
269 valid++;
270 if (p[i].skip) {
271 phys_page_compact(&p[i], nodes, compacted);
275 /* We can only compress if there's only one child. */
276 if (valid != 1) {
277 return;
280 assert(valid_ptr < P_L2_SIZE);
282 /* Don't compress if it won't fit in the # of bits we have. */
283 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
284 return;
287 lp->ptr = p[valid_ptr].ptr;
288 if (!p[valid_ptr].skip) {
289 /* If our only child is a leaf, make this a leaf. */
290 /* By design, we should have made this node a leaf to begin with so we
291 * should never reach here.
292 * But since it's so simple to handle this, let's do it just in case we
293 * change this rule.
295 lp->skip = 0;
296 } else {
297 lp->skip += p[valid_ptr].skip;
301 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
303 DECLARE_BITMAP(compacted, nodes_nb);
305 if (d->phys_map.skip) {
306 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
310 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
311 Node *nodes, MemoryRegionSection *sections)
313 PhysPageEntry *p;
314 hwaddr index = addr >> TARGET_PAGE_BITS;
315 int i;
317 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
318 if (lp.ptr == PHYS_MAP_NODE_NIL) {
319 return &sections[PHYS_SECTION_UNASSIGNED];
321 p = nodes[lp.ptr];
322 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
325 if (sections[lp.ptr].size.hi ||
326 range_covers_byte(sections[lp.ptr].offset_within_address_space,
327 sections[lp.ptr].size.lo, addr)) {
328 return &sections[lp.ptr];
329 } else {
330 return &sections[PHYS_SECTION_UNASSIGNED];
334 bool memory_region_is_unassigned(MemoryRegion *mr)
336 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
337 && mr != &io_mem_watch;
340 /* Called from RCU critical section */
341 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
342 hwaddr addr,
343 bool resolve_subpage)
345 MemoryRegionSection *section;
346 subpage_t *subpage;
348 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
349 if (resolve_subpage && section->mr->subpage) {
350 subpage = container_of(section->mr, subpage_t, iomem);
351 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
353 return section;
356 /* Called from RCU critical section */
357 static MemoryRegionSection *
358 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
359 hwaddr *plen, bool resolve_subpage)
361 MemoryRegionSection *section;
362 MemoryRegion *mr;
363 Int128 diff;
365 section = address_space_lookup_region(d, addr, resolve_subpage);
366 /* Compute offset within MemoryRegionSection */
367 addr -= section->offset_within_address_space;
369 /* Compute offset within MemoryRegion */
370 *xlat = addr + section->offset_within_region;
372 mr = section->mr;
374 /* MMIO registers can be expected to perform full-width accesses based only
375 * on their address, without considering adjacent registers that could
376 * decode to completely different MemoryRegions. When such registers
377 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
378 * regions overlap wildly. For this reason we cannot clamp the accesses
379 * here.
381 * If the length is small (as is the case for address_space_ldl/stl),
382 * everything works fine. If the incoming length is large, however,
383 * the caller really has to do the clamping through memory_access_size.
385 if (memory_region_is_ram(mr)) {
386 diff = int128_sub(section->size, int128_make64(addr));
387 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
389 return section;
392 /* Called from RCU critical section */
393 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
394 hwaddr *xlat, hwaddr *plen,
395 bool is_write)
397 IOMMUTLBEntry iotlb;
398 MemoryRegionSection *section;
399 MemoryRegion *mr;
401 for (;;) {
402 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
403 section = address_space_translate_internal(d, addr, &addr, plen, true);
404 mr = section->mr;
406 if (!mr->iommu_ops) {
407 break;
410 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
411 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
412 | (addr & iotlb.addr_mask));
413 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
414 if (!(iotlb.perm & (1 << is_write))) {
415 mr = &io_mem_unassigned;
416 break;
419 as = iotlb.target_as;
422 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
423 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
424 *plen = MIN(page, *plen);
427 *xlat = addr;
428 return mr;
431 /* Called from RCU critical section */
432 MemoryRegionSection *
433 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
434 hwaddr *xlat, hwaddr *plen)
436 MemoryRegionSection *section;
437 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
439 section = address_space_translate_internal(d, addr, xlat, plen, false);
441 assert(!section->mr->iommu_ops);
442 return section;
444 #endif
446 #if !defined(CONFIG_USER_ONLY)
448 static int cpu_common_post_load(void *opaque, int version_id)
450 CPUState *cpu = opaque;
452 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
453 version_id is increased. */
454 cpu->interrupt_request &= ~0x01;
455 tlb_flush(cpu, 1);
457 return 0;
460 static int cpu_common_pre_load(void *opaque)
462 CPUState *cpu = opaque;
464 cpu->exception_index = -1;
466 return 0;
469 static bool cpu_common_exception_index_needed(void *opaque)
471 CPUState *cpu = opaque;
473 return tcg_enabled() && cpu->exception_index != -1;
476 static const VMStateDescription vmstate_cpu_common_exception_index = {
477 .name = "cpu_common/exception_index",
478 .version_id = 1,
479 .minimum_version_id = 1,
480 .needed = cpu_common_exception_index_needed,
481 .fields = (VMStateField[]) {
482 VMSTATE_INT32(exception_index, CPUState),
483 VMSTATE_END_OF_LIST()
487 static bool cpu_common_crash_occurred_needed(void *opaque)
489 CPUState *cpu = opaque;
491 return cpu->crash_occurred;
494 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
495 .name = "cpu_common/crash_occurred",
496 .version_id = 1,
497 .minimum_version_id = 1,
498 .needed = cpu_common_crash_occurred_needed,
499 .fields = (VMStateField[]) {
500 VMSTATE_BOOL(crash_occurred, CPUState),
501 VMSTATE_END_OF_LIST()
505 const VMStateDescription vmstate_cpu_common = {
506 .name = "cpu_common",
507 .version_id = 1,
508 .minimum_version_id = 1,
509 .pre_load = cpu_common_pre_load,
510 .post_load = cpu_common_post_load,
511 .fields = (VMStateField[]) {
512 VMSTATE_UINT32(halted, CPUState),
513 VMSTATE_UINT32(interrupt_request, CPUState),
514 VMSTATE_END_OF_LIST()
516 .subsections = (const VMStateDescription*[]) {
517 &vmstate_cpu_common_exception_index,
518 &vmstate_cpu_common_crash_occurred,
519 NULL
523 #endif
525 CPUState *qemu_get_cpu(int index)
527 CPUState *cpu;
529 CPU_FOREACH(cpu) {
530 if (cpu->cpu_index == index) {
531 return cpu;
535 return NULL;
538 #if !defined(CONFIG_USER_ONLY)
539 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
541 CPUAddressSpace *newas;
543 /* Target code should have set num_ases before calling us */
544 assert(asidx < cpu->num_ases);
546 if (asidx == 0) {
547 /* address space 0 gets the convenience alias */
548 cpu->as = as;
551 /* KVM cannot currently support multiple address spaces. */
552 assert(asidx == 0 || !kvm_enabled());
554 if (!cpu->cpu_ases) {
555 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
558 newas = &cpu->cpu_ases[asidx];
559 newas->cpu = cpu;
560 newas->as = as;
561 if (tcg_enabled()) {
562 newas->tcg_as_listener.commit = tcg_commit;
563 memory_listener_register(&newas->tcg_as_listener, as);
567 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
569 /* Return the AddressSpace corresponding to the specified index */
570 return cpu->cpu_ases[asidx].as;
572 #endif
574 #ifndef CONFIG_USER_ONLY
575 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
577 static int cpu_get_free_index(Error **errp)
579 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
581 if (cpu >= MAX_CPUMASK_BITS) {
582 error_setg(errp, "Trying to use more CPUs than max of %d",
583 MAX_CPUMASK_BITS);
584 return -1;
587 bitmap_set(cpu_index_map, cpu, 1);
588 return cpu;
591 void cpu_exec_exit(CPUState *cpu)
593 if (cpu->cpu_index == -1) {
594 /* cpu_index was never allocated by this @cpu or was already freed. */
595 return;
598 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
599 cpu->cpu_index = -1;
601 #else
603 static int cpu_get_free_index(Error **errp)
605 CPUState *some_cpu;
606 int cpu_index = 0;
608 CPU_FOREACH(some_cpu) {
609 cpu_index++;
611 return cpu_index;
614 void cpu_exec_exit(CPUState *cpu)
617 #endif
619 void cpu_exec_init(CPUState *cpu, Error **errp)
621 CPUClass *cc = CPU_GET_CLASS(cpu);
622 int cpu_index;
623 Error *local_err = NULL;
625 cpu->as = NULL;
626 cpu->num_ases = 0;
628 #ifndef CONFIG_USER_ONLY
629 cpu->thread_id = qemu_get_thread_id();
631 /* This is a softmmu CPU object, so create a property for it
632 * so users can wire up its memory. (This can't go in qom/cpu.c
633 * because that file is compiled only once for both user-mode
634 * and system builds.) The default if no link is set up is to use
635 * the system address space.
637 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
638 (Object **)&cpu->memory,
639 qdev_prop_allow_set_link_before_realize,
640 OBJ_PROP_LINK_UNREF_ON_RELEASE,
641 &error_abort);
642 cpu->memory = system_memory;
643 object_ref(OBJECT(cpu->memory));
644 #endif
646 #if defined(CONFIG_USER_ONLY)
647 cpu_list_lock();
648 #endif
649 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
650 if (local_err) {
651 error_propagate(errp, local_err);
652 #if defined(CONFIG_USER_ONLY)
653 cpu_list_unlock();
654 #endif
655 return;
657 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
658 #if defined(CONFIG_USER_ONLY)
659 cpu_list_unlock();
660 #endif
661 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
662 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
664 if (cc->vmsd != NULL) {
665 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
669 #if defined(CONFIG_USER_ONLY)
670 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
672 tb_invalidate_phys_page_range(pc, pc + 1, 0);
674 #else
675 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
677 MemTxAttrs attrs;
678 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
679 int asidx = cpu_asidx_from_attrs(cpu, attrs);
680 if (phys != -1) {
681 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
682 phys | (pc & ~TARGET_PAGE_MASK));
685 #endif
687 #if defined(CONFIG_USER_ONLY)
688 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
693 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
694 int flags)
696 return -ENOSYS;
699 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
703 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
704 int flags, CPUWatchpoint **watchpoint)
706 return -ENOSYS;
708 #else
709 /* Add a watchpoint. */
710 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
711 int flags, CPUWatchpoint **watchpoint)
713 CPUWatchpoint *wp;
715 /* forbid ranges which are empty or run off the end of the address space */
716 if (len == 0 || (addr + len - 1) < addr) {
717 error_report("tried to set invalid watchpoint at %"
718 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
719 return -EINVAL;
721 wp = g_malloc(sizeof(*wp));
723 wp->vaddr = addr;
724 wp->len = len;
725 wp->flags = flags;
727 /* keep all GDB-injected watchpoints in front */
728 if (flags & BP_GDB) {
729 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
730 } else {
731 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
734 tlb_flush_page(cpu, addr);
736 if (watchpoint)
737 *watchpoint = wp;
738 return 0;
741 /* Remove a specific watchpoint. */
742 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
743 int flags)
745 CPUWatchpoint *wp;
747 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
748 if (addr == wp->vaddr && len == wp->len
749 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
750 cpu_watchpoint_remove_by_ref(cpu, wp);
751 return 0;
754 return -ENOENT;
757 /* Remove a specific watchpoint by reference. */
758 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
760 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
762 tlb_flush_page(cpu, watchpoint->vaddr);
764 g_free(watchpoint);
767 /* Remove all matching watchpoints. */
768 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
770 CPUWatchpoint *wp, *next;
772 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
773 if (wp->flags & mask) {
774 cpu_watchpoint_remove_by_ref(cpu, wp);
779 /* Return true if this watchpoint address matches the specified
780 * access (ie the address range covered by the watchpoint overlaps
781 * partially or completely with the address range covered by the
782 * access).
784 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
785 vaddr addr,
786 vaddr len)
788 /* We know the lengths are non-zero, but a little caution is
789 * required to avoid errors in the case where the range ends
790 * exactly at the top of the address space and so addr + len
791 * wraps round to zero.
793 vaddr wpend = wp->vaddr + wp->len - 1;
794 vaddr addrend = addr + len - 1;
796 return !(addr > wpend || wp->vaddr > addrend);
799 #endif
801 /* Add a breakpoint. */
802 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
803 CPUBreakpoint **breakpoint)
805 CPUBreakpoint *bp;
807 bp = g_malloc(sizeof(*bp));
809 bp->pc = pc;
810 bp->flags = flags;
812 /* keep all GDB-injected breakpoints in front */
813 if (flags & BP_GDB) {
814 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
815 } else {
816 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
819 breakpoint_invalidate(cpu, pc);
821 if (breakpoint) {
822 *breakpoint = bp;
824 return 0;
827 /* Remove a specific breakpoint. */
828 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
830 CPUBreakpoint *bp;
832 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
833 if (bp->pc == pc && bp->flags == flags) {
834 cpu_breakpoint_remove_by_ref(cpu, bp);
835 return 0;
838 return -ENOENT;
841 /* Remove a specific breakpoint by reference. */
842 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
844 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
846 breakpoint_invalidate(cpu, breakpoint->pc);
848 g_free(breakpoint);
851 /* Remove all matching breakpoints. */
852 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
854 CPUBreakpoint *bp, *next;
856 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
857 if (bp->flags & mask) {
858 cpu_breakpoint_remove_by_ref(cpu, bp);
863 /* enable or disable single step mode. EXCP_DEBUG is returned by the
864 CPU loop after each instruction */
865 void cpu_single_step(CPUState *cpu, int enabled)
867 if (cpu->singlestep_enabled != enabled) {
868 cpu->singlestep_enabled = enabled;
869 if (kvm_enabled()) {
870 kvm_update_guest_debug(cpu, 0);
871 } else {
872 /* must flush all the translated code to avoid inconsistencies */
873 /* XXX: only flush what is necessary */
874 tb_flush(cpu);
879 void cpu_abort(CPUState *cpu, const char *fmt, ...)
881 va_list ap;
882 va_list ap2;
884 va_start(ap, fmt);
885 va_copy(ap2, ap);
886 fprintf(stderr, "qemu: fatal: ");
887 vfprintf(stderr, fmt, ap);
888 fprintf(stderr, "\n");
889 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
890 if (qemu_log_separate()) {
891 qemu_log("qemu: fatal: ");
892 qemu_log_vprintf(fmt, ap2);
893 qemu_log("\n");
894 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
895 qemu_log_flush();
896 qemu_log_close();
898 va_end(ap2);
899 va_end(ap);
900 replay_finish();
901 #if defined(CONFIG_USER_ONLY)
903 struct sigaction act;
904 sigfillset(&act.sa_mask);
905 act.sa_handler = SIG_DFL;
906 sigaction(SIGABRT, &act, NULL);
908 #endif
909 abort();
912 #if !defined(CONFIG_USER_ONLY)
913 /* Called from RCU critical section */
914 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
916 RAMBlock *block;
918 block = atomic_rcu_read(&ram_list.mru_block);
919 if (block && addr - block->offset < block->max_length) {
920 return block;
922 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
923 if (addr - block->offset < block->max_length) {
924 goto found;
928 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
929 abort();
931 found:
932 /* It is safe to write mru_block outside the iothread lock. This
933 * is what happens:
935 * mru_block = xxx
936 * rcu_read_unlock()
937 * xxx removed from list
938 * rcu_read_lock()
939 * read mru_block
940 * mru_block = NULL;
941 * call_rcu(reclaim_ramblock, xxx);
942 * rcu_read_unlock()
944 * atomic_rcu_set is not needed here. The block was already published
945 * when it was placed into the list. Here we're just making an extra
946 * copy of the pointer.
948 ram_list.mru_block = block;
949 return block;
952 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
954 CPUState *cpu;
955 ram_addr_t start1;
956 RAMBlock *block;
957 ram_addr_t end;
959 end = TARGET_PAGE_ALIGN(start + length);
960 start &= TARGET_PAGE_MASK;
962 rcu_read_lock();
963 block = qemu_get_ram_block(start);
964 assert(block == qemu_get_ram_block(end - 1));
965 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
966 CPU_FOREACH(cpu) {
967 tlb_reset_dirty(cpu, start1, length);
969 rcu_read_unlock();
972 /* Note: start and end must be within the same ram block. */
973 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
974 ram_addr_t length,
975 unsigned client)
977 DirtyMemoryBlocks *blocks;
978 unsigned long end, page;
979 bool dirty = false;
981 if (length == 0) {
982 return false;
985 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
986 page = start >> TARGET_PAGE_BITS;
988 rcu_read_lock();
990 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
992 while (page < end) {
993 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
994 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
995 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
997 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
998 offset, num);
999 page += num;
1002 rcu_read_unlock();
1004 if (dirty && tcg_enabled()) {
1005 tlb_reset_dirty_range_all(start, length);
1008 return dirty;
1011 /* Called from RCU critical section */
1012 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1013 MemoryRegionSection *section,
1014 target_ulong vaddr,
1015 hwaddr paddr, hwaddr xlat,
1016 int prot,
1017 target_ulong *address)
1019 hwaddr iotlb;
1020 CPUWatchpoint *wp;
1022 if (memory_region_is_ram(section->mr)) {
1023 /* Normal RAM. */
1024 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1025 + xlat;
1026 if (!section->readonly) {
1027 iotlb |= PHYS_SECTION_NOTDIRTY;
1028 } else {
1029 iotlb |= PHYS_SECTION_ROM;
1031 } else {
1032 AddressSpaceDispatch *d;
1034 d = atomic_rcu_read(&section->address_space->dispatch);
1035 iotlb = section - d->map.sections;
1036 iotlb += xlat;
1039 /* Make accesses to pages with watchpoints go via the
1040 watchpoint trap routines. */
1041 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1042 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1043 /* Avoid trapping reads of pages with a write breakpoint. */
1044 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1045 iotlb = PHYS_SECTION_WATCH + paddr;
1046 *address |= TLB_MMIO;
1047 break;
1052 return iotlb;
1054 #endif /* defined(CONFIG_USER_ONLY) */
1056 #if !defined(CONFIG_USER_ONLY)
1058 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1059 uint16_t section);
1060 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1062 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1063 qemu_anon_ram_alloc;
1066 * Set a custom physical guest memory alloator.
1067 * Accelerators with unusual needs may need this. Hopefully, we can
1068 * get rid of it eventually.
1070 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1072 phys_mem_alloc = alloc;
1075 static uint16_t phys_section_add(PhysPageMap *map,
1076 MemoryRegionSection *section)
1078 /* The physical section number is ORed with a page-aligned
1079 * pointer to produce the iotlb entries. Thus it should
1080 * never overflow into the page-aligned value.
1082 assert(map->sections_nb < TARGET_PAGE_SIZE);
1084 if (map->sections_nb == map->sections_nb_alloc) {
1085 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1086 map->sections = g_renew(MemoryRegionSection, map->sections,
1087 map->sections_nb_alloc);
1089 map->sections[map->sections_nb] = *section;
1090 memory_region_ref(section->mr);
1091 return map->sections_nb++;
1094 static void phys_section_destroy(MemoryRegion *mr)
1096 bool have_sub_page = mr->subpage;
1098 memory_region_unref(mr);
1100 if (have_sub_page) {
1101 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1102 object_unref(OBJECT(&subpage->iomem));
1103 g_free(subpage);
1107 static void phys_sections_free(PhysPageMap *map)
1109 while (map->sections_nb > 0) {
1110 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1111 phys_section_destroy(section->mr);
1113 g_free(map->sections);
1114 g_free(map->nodes);
1117 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1119 subpage_t *subpage;
1120 hwaddr base = section->offset_within_address_space
1121 & TARGET_PAGE_MASK;
1122 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1123 d->map.nodes, d->map.sections);
1124 MemoryRegionSection subsection = {
1125 .offset_within_address_space = base,
1126 .size = int128_make64(TARGET_PAGE_SIZE),
1128 hwaddr start, end;
1130 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1132 if (!(existing->mr->subpage)) {
1133 subpage = subpage_init(d->as, base);
1134 subsection.address_space = d->as;
1135 subsection.mr = &subpage->iomem;
1136 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1137 phys_section_add(&d->map, &subsection));
1138 } else {
1139 subpage = container_of(existing->mr, subpage_t, iomem);
1141 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1142 end = start + int128_get64(section->size) - 1;
1143 subpage_register(subpage, start, end,
1144 phys_section_add(&d->map, section));
1148 static void register_multipage(AddressSpaceDispatch *d,
1149 MemoryRegionSection *section)
1151 hwaddr start_addr = section->offset_within_address_space;
1152 uint16_t section_index = phys_section_add(&d->map, section);
1153 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1154 TARGET_PAGE_BITS));
1156 assert(num_pages);
1157 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1160 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1162 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1163 AddressSpaceDispatch *d = as->next_dispatch;
1164 MemoryRegionSection now = *section, remain = *section;
1165 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1167 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1168 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1169 - now.offset_within_address_space;
1171 now.size = int128_min(int128_make64(left), now.size);
1172 register_subpage(d, &now);
1173 } else {
1174 now.size = int128_zero();
1176 while (int128_ne(remain.size, now.size)) {
1177 remain.size = int128_sub(remain.size, now.size);
1178 remain.offset_within_address_space += int128_get64(now.size);
1179 remain.offset_within_region += int128_get64(now.size);
1180 now = remain;
1181 if (int128_lt(remain.size, page_size)) {
1182 register_subpage(d, &now);
1183 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1184 now.size = page_size;
1185 register_subpage(d, &now);
1186 } else {
1187 now.size = int128_and(now.size, int128_neg(page_size));
1188 register_multipage(d, &now);
1193 void qemu_flush_coalesced_mmio_buffer(void)
1195 if (kvm_enabled())
1196 kvm_flush_coalesced_mmio_buffer();
1199 void qemu_mutex_lock_ramlist(void)
1201 qemu_mutex_lock(&ram_list.mutex);
1204 void qemu_mutex_unlock_ramlist(void)
1206 qemu_mutex_unlock(&ram_list.mutex);
1209 #ifdef __linux__
1211 #include <sys/vfs.h>
1213 #define HUGETLBFS_MAGIC 0x958458f6
1215 static long gethugepagesize(const char *path, Error **errp)
1217 struct statfs fs;
1218 int ret;
1220 do {
1221 ret = statfs(path, &fs);
1222 } while (ret != 0 && errno == EINTR);
1224 if (ret != 0) {
1225 error_setg_errno(errp, errno, "failed to get page size of file %s",
1226 path);
1227 return 0;
1230 return fs.f_bsize;
1233 static void *file_ram_alloc(RAMBlock *block,
1234 ram_addr_t memory,
1235 const char *path,
1236 Error **errp)
1238 struct stat st;
1239 char *filename;
1240 char *sanitized_name;
1241 char *c;
1242 void *area;
1243 int fd;
1244 uint64_t hpagesize;
1245 Error *local_err = NULL;
1247 hpagesize = gethugepagesize(path, &local_err);
1248 if (local_err) {
1249 error_propagate(errp, local_err);
1250 goto error;
1252 block->mr->align = hpagesize;
1254 if (memory < hpagesize) {
1255 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1256 "or larger than huge page size 0x%" PRIx64,
1257 memory, hpagesize);
1258 goto error;
1261 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1262 error_setg(errp,
1263 "host lacks kvm mmu notifiers, -mem-path unsupported");
1264 goto error;
1267 if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1268 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1269 sanitized_name = g_strdup(memory_region_name(block->mr));
1270 for (c = sanitized_name; *c != '\0'; c++) {
1271 if (*c == '/') {
1272 *c = '_';
1276 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1277 sanitized_name);
1278 g_free(sanitized_name);
1280 fd = mkstemp(filename);
1281 if (fd >= 0) {
1282 unlink(filename);
1284 g_free(filename);
1285 } else {
1286 fd = open(path, O_RDWR | O_CREAT, 0644);
1289 if (fd < 0) {
1290 error_setg_errno(errp, errno,
1291 "unable to create backing store for hugepages");
1292 goto error;
1295 memory = ROUND_UP(memory, hpagesize);
1298 * ftruncate is not supported by hugetlbfs in older
1299 * hosts, so don't bother bailing out on errors.
1300 * If anything goes wrong with it under other filesystems,
1301 * mmap will fail.
1303 if (ftruncate(fd, memory)) {
1304 perror("ftruncate");
1307 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1308 if (area == MAP_FAILED) {
1309 error_setg_errno(errp, errno,
1310 "unable to map backing store for hugepages");
1311 close(fd);
1312 goto error;
1315 if (mem_prealloc) {
1316 os_mem_prealloc(fd, area, memory);
1319 block->fd = fd;
1320 return area;
1322 error:
1323 return NULL;
1325 #endif
1327 /* Called with the ramlist lock held. */
1328 static ram_addr_t find_ram_offset(ram_addr_t size)
1330 RAMBlock *block, *next_block;
1331 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1333 assert(size != 0); /* it would hand out same offset multiple times */
1335 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1336 return 0;
1339 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1340 ram_addr_t end, next = RAM_ADDR_MAX;
1342 end = block->offset + block->max_length;
1344 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1345 if (next_block->offset >= end) {
1346 next = MIN(next, next_block->offset);
1349 if (next - end >= size && next - end < mingap) {
1350 offset = end;
1351 mingap = next - end;
1355 if (offset == RAM_ADDR_MAX) {
1356 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1357 (uint64_t)size);
1358 abort();
1361 return offset;
1364 ram_addr_t last_ram_offset(void)
1366 RAMBlock *block;
1367 ram_addr_t last = 0;
1369 rcu_read_lock();
1370 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1371 last = MAX(last, block->offset + block->max_length);
1373 rcu_read_unlock();
1374 return last;
1377 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1379 int ret;
1381 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1382 if (!machine_dump_guest_core(current_machine)) {
1383 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1384 if (ret) {
1385 perror("qemu_madvise");
1386 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1387 "but dump_guest_core=off specified\n");
1392 /* Called within an RCU critical section, or while the ramlist lock
1393 * is held.
1395 static RAMBlock *find_ram_block(ram_addr_t addr)
1397 RAMBlock *block;
1399 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1400 if (block->offset == addr) {
1401 return block;
1405 return NULL;
1408 const char *qemu_ram_get_idstr(RAMBlock *rb)
1410 return rb->idstr;
1413 /* Called with iothread lock held. */
1414 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1416 RAMBlock *new_block, *block;
1418 rcu_read_lock();
1419 new_block = find_ram_block(addr);
1420 assert(new_block);
1421 assert(!new_block->idstr[0]);
1423 if (dev) {
1424 char *id = qdev_get_dev_path(dev);
1425 if (id) {
1426 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1427 g_free(id);
1430 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1432 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1433 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1434 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1435 new_block->idstr);
1436 abort();
1439 rcu_read_unlock();
1442 /* Called with iothread lock held. */
1443 void qemu_ram_unset_idstr(ram_addr_t addr)
1445 RAMBlock *block;
1447 /* FIXME: arch_init.c assumes that this is not called throughout
1448 * migration. Ignore the problem since hot-unplug during migration
1449 * does not work anyway.
1452 rcu_read_lock();
1453 block = find_ram_block(addr);
1454 if (block) {
1455 memset(block->idstr, 0, sizeof(block->idstr));
1457 rcu_read_unlock();
1460 static int memory_try_enable_merging(void *addr, size_t len)
1462 if (!machine_mem_merge(current_machine)) {
1463 /* disabled by the user */
1464 return 0;
1467 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1470 /* Only legal before guest might have detected the memory size: e.g. on
1471 * incoming migration, or right after reset.
1473 * As memory core doesn't know how is memory accessed, it is up to
1474 * resize callback to update device state and/or add assertions to detect
1475 * misuse, if necessary.
1477 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1479 RAMBlock *block = find_ram_block(base);
1481 assert(block);
1483 newsize = HOST_PAGE_ALIGN(newsize);
1485 if (block->used_length == newsize) {
1486 return 0;
1489 if (!(block->flags & RAM_RESIZEABLE)) {
1490 error_setg_errno(errp, EINVAL,
1491 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1492 " in != 0x" RAM_ADDR_FMT, block->idstr,
1493 newsize, block->used_length);
1494 return -EINVAL;
1497 if (block->max_length < newsize) {
1498 error_setg_errno(errp, EINVAL,
1499 "Length too large: %s: 0x" RAM_ADDR_FMT
1500 " > 0x" RAM_ADDR_FMT, block->idstr,
1501 newsize, block->max_length);
1502 return -EINVAL;
1505 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1506 block->used_length = newsize;
1507 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1508 DIRTY_CLIENTS_ALL);
1509 memory_region_set_size(block->mr, newsize);
1510 if (block->resized) {
1511 block->resized(block->idstr, newsize, block->host);
1513 return 0;
1516 /* Called with ram_list.mutex held */
1517 static void dirty_memory_extend(ram_addr_t old_ram_size,
1518 ram_addr_t new_ram_size)
1520 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1521 DIRTY_MEMORY_BLOCK_SIZE);
1522 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1523 DIRTY_MEMORY_BLOCK_SIZE);
1524 int i;
1526 /* Only need to extend if block count increased */
1527 if (new_num_blocks <= old_num_blocks) {
1528 return;
1531 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1532 DirtyMemoryBlocks *old_blocks;
1533 DirtyMemoryBlocks *new_blocks;
1534 int j;
1536 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1537 new_blocks = g_malloc(sizeof(*new_blocks) +
1538 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1540 if (old_num_blocks) {
1541 memcpy(new_blocks->blocks, old_blocks->blocks,
1542 old_num_blocks * sizeof(old_blocks->blocks[0]));
1545 for (j = old_num_blocks; j < new_num_blocks; j++) {
1546 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1549 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1551 if (old_blocks) {
1552 g_free_rcu(old_blocks, rcu);
1557 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1559 RAMBlock *block;
1560 RAMBlock *last_block = NULL;
1561 ram_addr_t old_ram_size, new_ram_size;
1562 Error *err = NULL;
1564 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1566 qemu_mutex_lock_ramlist();
1567 new_block->offset = find_ram_offset(new_block->max_length);
1569 if (!new_block->host) {
1570 if (xen_enabled()) {
1571 xen_ram_alloc(new_block->offset, new_block->max_length,
1572 new_block->mr, &err);
1573 if (err) {
1574 error_propagate(errp, err);
1575 qemu_mutex_unlock_ramlist();
1576 return -1;
1578 } else {
1579 new_block->host = phys_mem_alloc(new_block->max_length,
1580 &new_block->mr->align);
1581 if (!new_block->host) {
1582 error_setg_errno(errp, errno,
1583 "cannot set up guest memory '%s'",
1584 memory_region_name(new_block->mr));
1585 qemu_mutex_unlock_ramlist();
1586 return -1;
1588 memory_try_enable_merging(new_block->host, new_block->max_length);
1592 new_ram_size = MAX(old_ram_size,
1593 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1594 if (new_ram_size > old_ram_size) {
1595 migration_bitmap_extend(old_ram_size, new_ram_size);
1596 dirty_memory_extend(old_ram_size, new_ram_size);
1598 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1599 * QLIST (which has an RCU-friendly variant) does not have insertion at
1600 * tail, so save the last element in last_block.
1602 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1603 last_block = block;
1604 if (block->max_length < new_block->max_length) {
1605 break;
1608 if (block) {
1609 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1610 } else if (last_block) {
1611 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1612 } else { /* list is empty */
1613 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1615 ram_list.mru_block = NULL;
1617 /* Write list before version */
1618 smp_wmb();
1619 ram_list.version++;
1620 qemu_mutex_unlock_ramlist();
1622 cpu_physical_memory_set_dirty_range(new_block->offset,
1623 new_block->used_length,
1624 DIRTY_CLIENTS_ALL);
1626 if (new_block->host) {
1627 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1628 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1629 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1630 if (kvm_enabled()) {
1631 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1635 return new_block->offset;
1638 #ifdef __linux__
1639 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1640 bool share, const char *mem_path,
1641 Error **errp)
1643 RAMBlock *new_block;
1644 ram_addr_t addr;
1645 Error *local_err = NULL;
1647 if (xen_enabled()) {
1648 error_setg(errp, "-mem-path not supported with Xen");
1649 return -1;
1652 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1654 * file_ram_alloc() needs to allocate just like
1655 * phys_mem_alloc, but we haven't bothered to provide
1656 * a hook there.
1658 error_setg(errp,
1659 "-mem-path not supported with this accelerator");
1660 return -1;
1663 size = HOST_PAGE_ALIGN(size);
1664 new_block = g_malloc0(sizeof(*new_block));
1665 new_block->mr = mr;
1666 new_block->used_length = size;
1667 new_block->max_length = size;
1668 new_block->flags = share ? RAM_SHARED : 0;
1669 new_block->host = file_ram_alloc(new_block, size,
1670 mem_path, errp);
1671 if (!new_block->host) {
1672 g_free(new_block);
1673 return -1;
1676 addr = ram_block_add(new_block, &local_err);
1677 if (local_err) {
1678 g_free(new_block);
1679 error_propagate(errp, local_err);
1680 return -1;
1682 return addr;
1684 #endif
1686 static
1687 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1688 void (*resized)(const char*,
1689 uint64_t length,
1690 void *host),
1691 void *host, bool resizeable,
1692 MemoryRegion *mr, Error **errp)
1694 RAMBlock *new_block;
1695 ram_addr_t addr;
1696 Error *local_err = NULL;
1698 size = HOST_PAGE_ALIGN(size);
1699 max_size = HOST_PAGE_ALIGN(max_size);
1700 new_block = g_malloc0(sizeof(*new_block));
1701 new_block->mr = mr;
1702 new_block->resized = resized;
1703 new_block->used_length = size;
1704 new_block->max_length = max_size;
1705 assert(max_size >= size);
1706 new_block->fd = -1;
1707 new_block->host = host;
1708 if (host) {
1709 new_block->flags |= RAM_PREALLOC;
1711 if (resizeable) {
1712 new_block->flags |= RAM_RESIZEABLE;
1714 addr = ram_block_add(new_block, &local_err);
1715 if (local_err) {
1716 g_free(new_block);
1717 error_propagate(errp, local_err);
1718 return -1;
1721 mr->ram_block = new_block;
1722 return addr;
1725 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1726 MemoryRegion *mr, Error **errp)
1728 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1731 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1733 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1736 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1737 void (*resized)(const char*,
1738 uint64_t length,
1739 void *host),
1740 MemoryRegion *mr, Error **errp)
1742 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1745 static void reclaim_ramblock(RAMBlock *block)
1747 if (block->flags & RAM_PREALLOC) {
1749 } else if (xen_enabled()) {
1750 xen_invalidate_map_cache_entry(block->host);
1751 #ifndef _WIN32
1752 } else if (block->fd >= 0) {
1753 qemu_ram_munmap(block->host, block->max_length);
1754 close(block->fd);
1755 #endif
1756 } else {
1757 qemu_anon_ram_free(block->host, block->max_length);
1759 g_free(block);
1762 void qemu_ram_free(ram_addr_t addr)
1764 RAMBlock *block;
1766 qemu_mutex_lock_ramlist();
1767 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1768 if (addr == block->offset) {
1769 QLIST_REMOVE_RCU(block, next);
1770 ram_list.mru_block = NULL;
1771 /* Write list before version */
1772 smp_wmb();
1773 ram_list.version++;
1774 call_rcu(block, reclaim_ramblock, rcu);
1775 break;
1778 qemu_mutex_unlock_ramlist();
1781 #ifndef _WIN32
1782 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1784 RAMBlock *block;
1785 ram_addr_t offset;
1786 int flags;
1787 void *area, *vaddr;
1789 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1790 offset = addr - block->offset;
1791 if (offset < block->max_length) {
1792 vaddr = ramblock_ptr(block, offset);
1793 if (block->flags & RAM_PREALLOC) {
1795 } else if (xen_enabled()) {
1796 abort();
1797 } else {
1798 flags = MAP_FIXED;
1799 if (block->fd >= 0) {
1800 flags |= (block->flags & RAM_SHARED ?
1801 MAP_SHARED : MAP_PRIVATE);
1802 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1803 flags, block->fd, offset);
1804 } else {
1806 * Remap needs to match alloc. Accelerators that
1807 * set phys_mem_alloc never remap. If they did,
1808 * we'd need a remap hook here.
1810 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1812 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1813 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1814 flags, -1, 0);
1816 if (area != vaddr) {
1817 fprintf(stderr, "Could not remap addr: "
1818 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1819 length, addr);
1820 exit(1);
1822 memory_try_enable_merging(vaddr, length);
1823 qemu_ram_setup_dump(vaddr, length);
1828 #endif /* !_WIN32 */
1830 int qemu_get_ram_fd(ram_addr_t addr)
1832 RAMBlock *block;
1833 int fd;
1835 rcu_read_lock();
1836 block = qemu_get_ram_block(addr);
1837 fd = block->fd;
1838 rcu_read_unlock();
1839 return fd;
1842 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1844 RAMBlock *block;
1846 rcu_read_lock();
1847 block = qemu_get_ram_block(addr);
1848 block->fd = fd;
1849 rcu_read_unlock();
1852 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1854 RAMBlock *block;
1855 void *ptr;
1857 rcu_read_lock();
1858 block = qemu_get_ram_block(addr);
1859 ptr = ramblock_ptr(block, 0);
1860 rcu_read_unlock();
1861 return ptr;
1864 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1865 * This should not be used for general purpose DMA. Use address_space_map
1866 * or address_space_rw instead. For local memory (e.g. video ram) that the
1867 * device owns, use memory_region_get_ram_ptr.
1869 * Called within RCU critical section.
1871 void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1873 RAMBlock *block = ram_block;
1875 if (block == NULL) {
1876 block = qemu_get_ram_block(addr);
1879 if (xen_enabled() && block->host == NULL) {
1880 /* We need to check if the requested address is in the RAM
1881 * because we don't want to map the entire memory in QEMU.
1882 * In that case just map until the end of the page.
1884 if (block->offset == 0) {
1885 return xen_map_cache(addr, 0, 0);
1888 block->host = xen_map_cache(block->offset, block->max_length, 1);
1890 return ramblock_ptr(block, addr - block->offset);
1893 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1894 * but takes a size argument.
1896 * Called within RCU critical section.
1898 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1899 hwaddr *size)
1901 RAMBlock *block = ram_block;
1902 ram_addr_t offset_inside_block;
1903 if (*size == 0) {
1904 return NULL;
1907 if (block == NULL) {
1908 block = qemu_get_ram_block(addr);
1910 offset_inside_block = addr - block->offset;
1911 *size = MIN(*size, block->max_length - offset_inside_block);
1913 if (xen_enabled() && block->host == NULL) {
1914 /* We need to check if the requested address is in the RAM
1915 * because we don't want to map the entire memory in QEMU.
1916 * In that case just map the requested area.
1918 if (block->offset == 0) {
1919 return xen_map_cache(addr, *size, 1);
1922 block->host = xen_map_cache(block->offset, block->max_length, 1);
1925 return ramblock_ptr(block, offset_inside_block);
1929 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1930 * in that RAMBlock.
1932 * ptr: Host pointer to look up
1933 * round_offset: If true round the result offset down to a page boundary
1934 * *ram_addr: set to result ram_addr
1935 * *offset: set to result offset within the RAMBlock
1937 * Returns: RAMBlock (or NULL if not found)
1939 * By the time this function returns, the returned pointer is not protected
1940 * by RCU anymore. If the caller is not within an RCU critical section and
1941 * does not hold the iothread lock, it must have other means of protecting the
1942 * pointer, such as a reference to the region that includes the incoming
1943 * ram_addr_t.
1945 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1946 ram_addr_t *ram_addr,
1947 ram_addr_t *offset)
1949 RAMBlock *block;
1950 uint8_t *host = ptr;
1952 if (xen_enabled()) {
1953 rcu_read_lock();
1954 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1955 block = qemu_get_ram_block(*ram_addr);
1956 if (block) {
1957 *offset = (host - block->host);
1959 rcu_read_unlock();
1960 return block;
1963 rcu_read_lock();
1964 block = atomic_rcu_read(&ram_list.mru_block);
1965 if (block && block->host && host - block->host < block->max_length) {
1966 goto found;
1969 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1970 /* This case append when the block is not mapped. */
1971 if (block->host == NULL) {
1972 continue;
1974 if (host - block->host < block->max_length) {
1975 goto found;
1979 rcu_read_unlock();
1980 return NULL;
1982 found:
1983 *offset = (host - block->host);
1984 if (round_offset) {
1985 *offset &= TARGET_PAGE_MASK;
1987 *ram_addr = block->offset + *offset;
1988 rcu_read_unlock();
1989 return block;
1993 * Finds the named RAMBlock
1995 * name: The name of RAMBlock to find
1997 * Returns: RAMBlock (or NULL if not found)
1999 RAMBlock *qemu_ram_block_by_name(const char *name)
2001 RAMBlock *block;
2003 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2004 if (!strcmp(name, block->idstr)) {
2005 return block;
2009 return NULL;
2012 /* Some of the softmmu routines need to translate from a host pointer
2013 (typically a TLB entry) back to a ram offset. */
2014 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2016 RAMBlock *block;
2017 ram_addr_t offset; /* Not used */
2019 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2021 if (!block) {
2022 return NULL;
2025 return block->mr;
2028 /* Called within RCU critical section. */
2029 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2030 uint64_t val, unsigned size)
2032 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2033 tb_invalidate_phys_page_fast(ram_addr, size);
2035 switch (size) {
2036 case 1:
2037 stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2038 break;
2039 case 2:
2040 stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2041 break;
2042 case 4:
2043 stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
2044 break;
2045 default:
2046 abort();
2048 /* Set both VGA and migration bits for simplicity and to remove
2049 * the notdirty callback faster.
2051 cpu_physical_memory_set_dirty_range(ram_addr, size,
2052 DIRTY_CLIENTS_NOCODE);
2053 /* we remove the notdirty callback only if the code has been
2054 flushed */
2055 if (!cpu_physical_memory_is_clean(ram_addr)) {
2056 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2060 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2061 unsigned size, bool is_write)
2063 return is_write;
2066 static const MemoryRegionOps notdirty_mem_ops = {
2067 .write = notdirty_mem_write,
2068 .valid.accepts = notdirty_mem_accepts,
2069 .endianness = DEVICE_NATIVE_ENDIAN,
2072 /* Generate a debug exception if a watchpoint has been hit. */
2073 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2075 CPUState *cpu = current_cpu;
2076 CPUClass *cc = CPU_GET_CLASS(cpu);
2077 CPUArchState *env = cpu->env_ptr;
2078 target_ulong pc, cs_base;
2079 target_ulong vaddr;
2080 CPUWatchpoint *wp;
2081 int cpu_flags;
2083 if (cpu->watchpoint_hit) {
2084 /* We re-entered the check after replacing the TB. Now raise
2085 * the debug interrupt so that is will trigger after the
2086 * current instruction. */
2087 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2088 return;
2090 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2091 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2092 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2093 && (wp->flags & flags)) {
2094 if (flags == BP_MEM_READ) {
2095 wp->flags |= BP_WATCHPOINT_HIT_READ;
2096 } else {
2097 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2099 wp->hitaddr = vaddr;
2100 wp->hitattrs = attrs;
2101 if (!cpu->watchpoint_hit) {
2102 if (wp->flags & BP_CPU &&
2103 !cc->debug_check_watchpoint(cpu, wp)) {
2104 wp->flags &= ~BP_WATCHPOINT_HIT;
2105 continue;
2107 cpu->watchpoint_hit = wp;
2108 tb_check_watchpoint(cpu);
2109 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2110 cpu->exception_index = EXCP_DEBUG;
2111 cpu_loop_exit(cpu);
2112 } else {
2113 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2114 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2115 cpu_resume_from_signal(cpu, NULL);
2118 } else {
2119 wp->flags &= ~BP_WATCHPOINT_HIT;
2124 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2125 so these check for a hit then pass through to the normal out-of-line
2126 phys routines. */
2127 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2128 unsigned size, MemTxAttrs attrs)
2130 MemTxResult res;
2131 uint64_t data;
2132 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2133 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2135 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2136 switch (size) {
2137 case 1:
2138 data = address_space_ldub(as, addr, attrs, &res);
2139 break;
2140 case 2:
2141 data = address_space_lduw(as, addr, attrs, &res);
2142 break;
2143 case 4:
2144 data = address_space_ldl(as, addr, attrs, &res);
2145 break;
2146 default: abort();
2148 *pdata = data;
2149 return res;
2152 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2153 uint64_t val, unsigned size,
2154 MemTxAttrs attrs)
2156 MemTxResult res;
2157 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2158 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2160 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2161 switch (size) {
2162 case 1:
2163 address_space_stb(as, addr, val, attrs, &res);
2164 break;
2165 case 2:
2166 address_space_stw(as, addr, val, attrs, &res);
2167 break;
2168 case 4:
2169 address_space_stl(as, addr, val, attrs, &res);
2170 break;
2171 default: abort();
2173 return res;
2176 static const MemoryRegionOps watch_mem_ops = {
2177 .read_with_attrs = watch_mem_read,
2178 .write_with_attrs = watch_mem_write,
2179 .endianness = DEVICE_NATIVE_ENDIAN,
2182 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2183 unsigned len, MemTxAttrs attrs)
2185 subpage_t *subpage = opaque;
2186 uint8_t buf[8];
2187 MemTxResult res;
2189 #if defined(DEBUG_SUBPAGE)
2190 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2191 subpage, len, addr);
2192 #endif
2193 res = address_space_read(subpage->as, addr + subpage->base,
2194 attrs, buf, len);
2195 if (res) {
2196 return res;
2198 switch (len) {
2199 case 1:
2200 *data = ldub_p(buf);
2201 return MEMTX_OK;
2202 case 2:
2203 *data = lduw_p(buf);
2204 return MEMTX_OK;
2205 case 4:
2206 *data = ldl_p(buf);
2207 return MEMTX_OK;
2208 case 8:
2209 *data = ldq_p(buf);
2210 return MEMTX_OK;
2211 default:
2212 abort();
2216 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2217 uint64_t value, unsigned len, MemTxAttrs attrs)
2219 subpage_t *subpage = opaque;
2220 uint8_t buf[8];
2222 #if defined(DEBUG_SUBPAGE)
2223 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2224 " value %"PRIx64"\n",
2225 __func__, subpage, len, addr, value);
2226 #endif
2227 switch (len) {
2228 case 1:
2229 stb_p(buf, value);
2230 break;
2231 case 2:
2232 stw_p(buf, value);
2233 break;
2234 case 4:
2235 stl_p(buf, value);
2236 break;
2237 case 8:
2238 stq_p(buf, value);
2239 break;
2240 default:
2241 abort();
2243 return address_space_write(subpage->as, addr + subpage->base,
2244 attrs, buf, len);
2247 static bool subpage_accepts(void *opaque, hwaddr addr,
2248 unsigned len, bool is_write)
2250 subpage_t *subpage = opaque;
2251 #if defined(DEBUG_SUBPAGE)
2252 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2253 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2254 #endif
2256 return address_space_access_valid(subpage->as, addr + subpage->base,
2257 len, is_write);
2260 static const MemoryRegionOps subpage_ops = {
2261 .read_with_attrs = subpage_read,
2262 .write_with_attrs = subpage_write,
2263 .impl.min_access_size = 1,
2264 .impl.max_access_size = 8,
2265 .valid.min_access_size = 1,
2266 .valid.max_access_size = 8,
2267 .valid.accepts = subpage_accepts,
2268 .endianness = DEVICE_NATIVE_ENDIAN,
2271 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2272 uint16_t section)
2274 int idx, eidx;
2276 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2277 return -1;
2278 idx = SUBPAGE_IDX(start);
2279 eidx = SUBPAGE_IDX(end);
2280 #if defined(DEBUG_SUBPAGE)
2281 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2282 __func__, mmio, start, end, idx, eidx, section);
2283 #endif
2284 for (; idx <= eidx; idx++) {
2285 mmio->sub_section[idx] = section;
2288 return 0;
2291 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2293 subpage_t *mmio;
2295 mmio = g_malloc0(sizeof(subpage_t));
2297 mmio->as = as;
2298 mmio->base = base;
2299 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2300 NULL, TARGET_PAGE_SIZE);
2301 mmio->iomem.subpage = true;
2302 #if defined(DEBUG_SUBPAGE)
2303 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2304 mmio, base, TARGET_PAGE_SIZE);
2305 #endif
2306 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2308 return mmio;
2311 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2312 MemoryRegion *mr)
2314 assert(as);
2315 MemoryRegionSection section = {
2316 .address_space = as,
2317 .mr = mr,
2318 .offset_within_address_space = 0,
2319 .offset_within_region = 0,
2320 .size = int128_2_64(),
2323 return phys_section_add(map, &section);
2326 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2328 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2329 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2330 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2331 MemoryRegionSection *sections = d->map.sections;
2333 return sections[index & ~TARGET_PAGE_MASK].mr;
2336 static void io_mem_init(void)
2338 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2339 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2340 NULL, UINT64_MAX);
2341 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2342 NULL, UINT64_MAX);
2343 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2344 NULL, UINT64_MAX);
2347 static void mem_begin(MemoryListener *listener)
2349 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2350 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2351 uint16_t n;
2353 n = dummy_section(&d->map, as, &io_mem_unassigned);
2354 assert(n == PHYS_SECTION_UNASSIGNED);
2355 n = dummy_section(&d->map, as, &io_mem_notdirty);
2356 assert(n == PHYS_SECTION_NOTDIRTY);
2357 n = dummy_section(&d->map, as, &io_mem_rom);
2358 assert(n == PHYS_SECTION_ROM);
2359 n = dummy_section(&d->map, as, &io_mem_watch);
2360 assert(n == PHYS_SECTION_WATCH);
2362 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2363 d->as = as;
2364 as->next_dispatch = d;
2367 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2369 phys_sections_free(&d->map);
2370 g_free(d);
2373 static void mem_commit(MemoryListener *listener)
2375 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2376 AddressSpaceDispatch *cur = as->dispatch;
2377 AddressSpaceDispatch *next = as->next_dispatch;
2379 phys_page_compact_all(next, next->map.nodes_nb);
2381 atomic_rcu_set(&as->dispatch, next);
2382 if (cur) {
2383 call_rcu(cur, address_space_dispatch_free, rcu);
2387 static void tcg_commit(MemoryListener *listener)
2389 CPUAddressSpace *cpuas;
2390 AddressSpaceDispatch *d;
2392 /* since each CPU stores ram addresses in its TLB cache, we must
2393 reset the modified entries */
2394 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2395 cpu_reloading_memory_map();
2396 /* The CPU and TLB are protected by the iothread lock.
2397 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2398 * may have split the RCU critical section.
2400 d = atomic_rcu_read(&cpuas->as->dispatch);
2401 cpuas->memory_dispatch = d;
2402 tlb_flush(cpuas->cpu, 1);
2405 void address_space_init_dispatch(AddressSpace *as)
2407 as->dispatch = NULL;
2408 as->dispatch_listener = (MemoryListener) {
2409 .begin = mem_begin,
2410 .commit = mem_commit,
2411 .region_add = mem_add,
2412 .region_nop = mem_add,
2413 .priority = 0,
2415 memory_listener_register(&as->dispatch_listener, as);
2418 void address_space_unregister(AddressSpace *as)
2420 memory_listener_unregister(&as->dispatch_listener);
2423 void address_space_destroy_dispatch(AddressSpace *as)
2425 AddressSpaceDispatch *d = as->dispatch;
2427 atomic_rcu_set(&as->dispatch, NULL);
2428 if (d) {
2429 call_rcu(d, address_space_dispatch_free, rcu);
2433 static void memory_map_init(void)
2435 system_memory = g_malloc(sizeof(*system_memory));
2437 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2438 address_space_init(&address_space_memory, system_memory, "memory");
2440 system_io = g_malloc(sizeof(*system_io));
2441 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2442 65536);
2443 address_space_init(&address_space_io, system_io, "I/O");
2446 MemoryRegion *get_system_memory(void)
2448 return system_memory;
2451 MemoryRegion *get_system_io(void)
2453 return system_io;
2456 #endif /* !defined(CONFIG_USER_ONLY) */
2458 /* physical memory access (slow version, mainly for debug) */
2459 #if defined(CONFIG_USER_ONLY)
2460 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2461 uint8_t *buf, int len, int is_write)
2463 int l, flags;
2464 target_ulong page;
2465 void * p;
2467 while (len > 0) {
2468 page = addr & TARGET_PAGE_MASK;
2469 l = (page + TARGET_PAGE_SIZE) - addr;
2470 if (l > len)
2471 l = len;
2472 flags = page_get_flags(page);
2473 if (!(flags & PAGE_VALID))
2474 return -1;
2475 if (is_write) {
2476 if (!(flags & PAGE_WRITE))
2477 return -1;
2478 /* XXX: this code should not depend on lock_user */
2479 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2480 return -1;
2481 memcpy(p, buf, l);
2482 unlock_user(p, addr, l);
2483 } else {
2484 if (!(flags & PAGE_READ))
2485 return -1;
2486 /* XXX: this code should not depend on lock_user */
2487 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2488 return -1;
2489 memcpy(buf, p, l);
2490 unlock_user(p, addr, 0);
2492 len -= l;
2493 buf += l;
2494 addr += l;
2496 return 0;
2499 #else
2501 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2502 hwaddr length)
2504 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2505 /* No early return if dirty_log_mask is or becomes 0, because
2506 * cpu_physical_memory_set_dirty_range will still call
2507 * xen_modified_memory.
2509 if (dirty_log_mask) {
2510 dirty_log_mask =
2511 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2513 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2514 tb_invalidate_phys_range(addr, addr + length);
2515 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2517 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2520 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2522 unsigned access_size_max = mr->ops->valid.max_access_size;
2524 /* Regions are assumed to support 1-4 byte accesses unless
2525 otherwise specified. */
2526 if (access_size_max == 0) {
2527 access_size_max = 4;
2530 /* Bound the maximum access by the alignment of the address. */
2531 if (!mr->ops->impl.unaligned) {
2532 unsigned align_size_max = addr & -addr;
2533 if (align_size_max != 0 && align_size_max < access_size_max) {
2534 access_size_max = align_size_max;
2538 /* Don't attempt accesses larger than the maximum. */
2539 if (l > access_size_max) {
2540 l = access_size_max;
2542 l = pow2floor(l);
2544 return l;
2547 static bool prepare_mmio_access(MemoryRegion *mr)
2549 bool unlocked = !qemu_mutex_iothread_locked();
2550 bool release_lock = false;
2552 if (unlocked && mr->global_locking) {
2553 qemu_mutex_lock_iothread();
2554 unlocked = false;
2555 release_lock = true;
2557 if (mr->flush_coalesced_mmio) {
2558 if (unlocked) {
2559 qemu_mutex_lock_iothread();
2561 qemu_flush_coalesced_mmio_buffer();
2562 if (unlocked) {
2563 qemu_mutex_unlock_iothread();
2567 return release_lock;
2570 /* Called within RCU critical section. */
2571 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2572 MemTxAttrs attrs,
2573 const uint8_t *buf,
2574 int len, hwaddr addr1,
2575 hwaddr l, MemoryRegion *mr)
2577 uint8_t *ptr;
2578 uint64_t val;
2579 MemTxResult result = MEMTX_OK;
2580 bool release_lock = false;
2582 for (;;) {
2583 if (!memory_access_is_direct(mr, true)) {
2584 release_lock |= prepare_mmio_access(mr);
2585 l = memory_access_size(mr, l, addr1);
2586 /* XXX: could force current_cpu to NULL to avoid
2587 potential bugs */
2588 switch (l) {
2589 case 8:
2590 /* 64 bit write access */
2591 val = ldq_p(buf);
2592 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2593 attrs);
2594 break;
2595 case 4:
2596 /* 32 bit write access */
2597 val = ldl_p(buf);
2598 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2599 attrs);
2600 break;
2601 case 2:
2602 /* 16 bit write access */
2603 val = lduw_p(buf);
2604 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2605 attrs);
2606 break;
2607 case 1:
2608 /* 8 bit write access */
2609 val = ldub_p(buf);
2610 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2611 attrs);
2612 break;
2613 default:
2614 abort();
2616 } else {
2617 addr1 += memory_region_get_ram_addr(mr);
2618 /* RAM case */
2619 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2620 memcpy(ptr, buf, l);
2621 invalidate_and_set_dirty(mr, addr1, l);
2624 if (release_lock) {
2625 qemu_mutex_unlock_iothread();
2626 release_lock = false;
2629 len -= l;
2630 buf += l;
2631 addr += l;
2633 if (!len) {
2634 break;
2637 l = len;
2638 mr = address_space_translate(as, addr, &addr1, &l, true);
2641 return result;
2644 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2645 const uint8_t *buf, int len)
2647 hwaddr l;
2648 hwaddr addr1;
2649 MemoryRegion *mr;
2650 MemTxResult result = MEMTX_OK;
2652 if (len > 0) {
2653 rcu_read_lock();
2654 l = len;
2655 mr = address_space_translate(as, addr, &addr1, &l, true);
2656 result = address_space_write_continue(as, addr, attrs, buf, len,
2657 addr1, l, mr);
2658 rcu_read_unlock();
2661 return result;
2664 /* Called within RCU critical section. */
2665 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2666 MemTxAttrs attrs, uint8_t *buf,
2667 int len, hwaddr addr1, hwaddr l,
2668 MemoryRegion *mr)
2670 uint8_t *ptr;
2671 uint64_t val;
2672 MemTxResult result = MEMTX_OK;
2673 bool release_lock = false;
2675 for (;;) {
2676 if (!memory_access_is_direct(mr, false)) {
2677 /* I/O case */
2678 release_lock |= prepare_mmio_access(mr);
2679 l = memory_access_size(mr, l, addr1);
2680 switch (l) {
2681 case 8:
2682 /* 64 bit read access */
2683 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2684 attrs);
2685 stq_p(buf, val);
2686 break;
2687 case 4:
2688 /* 32 bit read access */
2689 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2690 attrs);
2691 stl_p(buf, val);
2692 break;
2693 case 2:
2694 /* 16 bit read access */
2695 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2696 attrs);
2697 stw_p(buf, val);
2698 break;
2699 case 1:
2700 /* 8 bit read access */
2701 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2702 attrs);
2703 stb_p(buf, val);
2704 break;
2705 default:
2706 abort();
2708 } else {
2709 /* RAM case */
2710 ptr = qemu_get_ram_ptr(mr->ram_block, mr->ram_addr + addr1);
2711 memcpy(buf, ptr, l);
2714 if (release_lock) {
2715 qemu_mutex_unlock_iothread();
2716 release_lock = false;
2719 len -= l;
2720 buf += l;
2721 addr += l;
2723 if (!len) {
2724 break;
2727 l = len;
2728 mr = address_space_translate(as, addr, &addr1, &l, false);
2731 return result;
2734 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2735 MemTxAttrs attrs, uint8_t *buf, int len)
2737 hwaddr l;
2738 hwaddr addr1;
2739 MemoryRegion *mr;
2740 MemTxResult result = MEMTX_OK;
2742 if (len > 0) {
2743 rcu_read_lock();
2744 l = len;
2745 mr = address_space_translate(as, addr, &addr1, &l, false);
2746 result = address_space_read_continue(as, addr, attrs, buf, len,
2747 addr1, l, mr);
2748 rcu_read_unlock();
2751 return result;
2754 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2755 uint8_t *buf, int len, bool is_write)
2757 if (is_write) {
2758 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2759 } else {
2760 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2764 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2765 int len, int is_write)
2767 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2768 buf, len, is_write);
2771 enum write_rom_type {
2772 WRITE_DATA,
2773 FLUSH_CACHE,
2776 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2777 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2779 hwaddr l;
2780 uint8_t *ptr;
2781 hwaddr addr1;
2782 MemoryRegion *mr;
2784 rcu_read_lock();
2785 while (len > 0) {
2786 l = len;
2787 mr = address_space_translate(as, addr, &addr1, &l, true);
2789 if (!(memory_region_is_ram(mr) ||
2790 memory_region_is_romd(mr))) {
2791 l = memory_access_size(mr, l, addr1);
2792 } else {
2793 addr1 += memory_region_get_ram_addr(mr);
2794 /* ROM/RAM case */
2795 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
2796 switch (type) {
2797 case WRITE_DATA:
2798 memcpy(ptr, buf, l);
2799 invalidate_and_set_dirty(mr, addr1, l);
2800 break;
2801 case FLUSH_CACHE:
2802 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2803 break;
2806 len -= l;
2807 buf += l;
2808 addr += l;
2810 rcu_read_unlock();
2813 /* used for ROM loading : can write in RAM and ROM */
2814 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2815 const uint8_t *buf, int len)
2817 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2820 void cpu_flush_icache_range(hwaddr start, int len)
2823 * This function should do the same thing as an icache flush that was
2824 * triggered from within the guest. For TCG we are always cache coherent,
2825 * so there is no need to flush anything. For KVM / Xen we need to flush
2826 * the host's instruction cache at least.
2828 if (tcg_enabled()) {
2829 return;
2832 cpu_physical_memory_write_rom_internal(&address_space_memory,
2833 start, NULL, len, FLUSH_CACHE);
2836 typedef struct {
2837 MemoryRegion *mr;
2838 void *buffer;
2839 hwaddr addr;
2840 hwaddr len;
2841 bool in_use;
2842 } BounceBuffer;
2844 static BounceBuffer bounce;
2846 typedef struct MapClient {
2847 QEMUBH *bh;
2848 QLIST_ENTRY(MapClient) link;
2849 } MapClient;
2851 QemuMutex map_client_list_lock;
2852 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2853 = QLIST_HEAD_INITIALIZER(map_client_list);
2855 static void cpu_unregister_map_client_do(MapClient *client)
2857 QLIST_REMOVE(client, link);
2858 g_free(client);
2861 static void cpu_notify_map_clients_locked(void)
2863 MapClient *client;
2865 while (!QLIST_EMPTY(&map_client_list)) {
2866 client = QLIST_FIRST(&map_client_list);
2867 qemu_bh_schedule(client->bh);
2868 cpu_unregister_map_client_do(client);
2872 void cpu_register_map_client(QEMUBH *bh)
2874 MapClient *client = g_malloc(sizeof(*client));
2876 qemu_mutex_lock(&map_client_list_lock);
2877 client->bh = bh;
2878 QLIST_INSERT_HEAD(&map_client_list, client, link);
2879 if (!atomic_read(&bounce.in_use)) {
2880 cpu_notify_map_clients_locked();
2882 qemu_mutex_unlock(&map_client_list_lock);
2885 void cpu_exec_init_all(void)
2887 qemu_mutex_init(&ram_list.mutex);
2888 io_mem_init();
2889 memory_map_init();
2890 qemu_mutex_init(&map_client_list_lock);
2893 void cpu_unregister_map_client(QEMUBH *bh)
2895 MapClient *client;
2897 qemu_mutex_lock(&map_client_list_lock);
2898 QLIST_FOREACH(client, &map_client_list, link) {
2899 if (client->bh == bh) {
2900 cpu_unregister_map_client_do(client);
2901 break;
2904 qemu_mutex_unlock(&map_client_list_lock);
2907 static void cpu_notify_map_clients(void)
2909 qemu_mutex_lock(&map_client_list_lock);
2910 cpu_notify_map_clients_locked();
2911 qemu_mutex_unlock(&map_client_list_lock);
2914 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2916 MemoryRegion *mr;
2917 hwaddr l, xlat;
2919 rcu_read_lock();
2920 while (len > 0) {
2921 l = len;
2922 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2923 if (!memory_access_is_direct(mr, is_write)) {
2924 l = memory_access_size(mr, l, addr);
2925 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2926 return false;
2930 len -= l;
2931 addr += l;
2933 rcu_read_unlock();
2934 return true;
2937 /* Map a physical memory region into a host virtual address.
2938 * May map a subset of the requested range, given by and returned in *plen.
2939 * May return NULL if resources needed to perform the mapping are exhausted.
2940 * Use only for reads OR writes - not for read-modify-write operations.
2941 * Use cpu_register_map_client() to know when retrying the map operation is
2942 * likely to succeed.
2944 void *address_space_map(AddressSpace *as,
2945 hwaddr addr,
2946 hwaddr *plen,
2947 bool is_write)
2949 hwaddr len = *plen;
2950 hwaddr done = 0;
2951 hwaddr l, xlat, base;
2952 MemoryRegion *mr, *this_mr;
2953 ram_addr_t raddr;
2954 void *ptr;
2956 if (len == 0) {
2957 return NULL;
2960 l = len;
2961 rcu_read_lock();
2962 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2964 if (!memory_access_is_direct(mr, is_write)) {
2965 if (atomic_xchg(&bounce.in_use, true)) {
2966 rcu_read_unlock();
2967 return NULL;
2969 /* Avoid unbounded allocations */
2970 l = MIN(l, TARGET_PAGE_SIZE);
2971 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2972 bounce.addr = addr;
2973 bounce.len = l;
2975 memory_region_ref(mr);
2976 bounce.mr = mr;
2977 if (!is_write) {
2978 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2979 bounce.buffer, l);
2982 rcu_read_unlock();
2983 *plen = l;
2984 return bounce.buffer;
2987 base = xlat;
2988 raddr = memory_region_get_ram_addr(mr);
2990 for (;;) {
2991 len -= l;
2992 addr += l;
2993 done += l;
2994 if (len == 0) {
2995 break;
2998 l = len;
2999 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3000 if (this_mr != mr || xlat != base + done) {
3001 break;
3005 memory_region_ref(mr);
3006 *plen = done;
3007 ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
3008 rcu_read_unlock();
3010 return ptr;
3013 /* Unmaps a memory region previously mapped by address_space_map().
3014 * Will also mark the memory as dirty if is_write == 1. access_len gives
3015 * the amount of memory that was actually read or written by the caller.
3017 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3018 int is_write, hwaddr access_len)
3020 if (buffer != bounce.buffer) {
3021 MemoryRegion *mr;
3022 ram_addr_t addr1;
3024 mr = qemu_ram_addr_from_host(buffer, &addr1);
3025 assert(mr != NULL);
3026 if (is_write) {
3027 invalidate_and_set_dirty(mr, addr1, access_len);
3029 if (xen_enabled()) {
3030 xen_invalidate_map_cache_entry(buffer);
3032 memory_region_unref(mr);
3033 return;
3035 if (is_write) {
3036 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3037 bounce.buffer, access_len);
3039 qemu_vfree(bounce.buffer);
3040 bounce.buffer = NULL;
3041 memory_region_unref(bounce.mr);
3042 atomic_mb_set(&bounce.in_use, false);
3043 cpu_notify_map_clients();
3046 void *cpu_physical_memory_map(hwaddr addr,
3047 hwaddr *plen,
3048 int is_write)
3050 return address_space_map(&address_space_memory, addr, plen, is_write);
3053 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3054 int is_write, hwaddr access_len)
3056 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3059 /* warning: addr must be aligned */
3060 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3061 MemTxAttrs attrs,
3062 MemTxResult *result,
3063 enum device_endian endian)
3065 uint8_t *ptr;
3066 uint64_t val;
3067 MemoryRegion *mr;
3068 hwaddr l = 4;
3069 hwaddr addr1;
3070 MemTxResult r;
3071 bool release_lock = false;
3073 rcu_read_lock();
3074 mr = address_space_translate(as, addr, &addr1, &l, false);
3075 if (l < 4 || !memory_access_is_direct(mr, false)) {
3076 release_lock |= prepare_mmio_access(mr);
3078 /* I/O case */
3079 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3080 #if defined(TARGET_WORDS_BIGENDIAN)
3081 if (endian == DEVICE_LITTLE_ENDIAN) {
3082 val = bswap32(val);
3084 #else
3085 if (endian == DEVICE_BIG_ENDIAN) {
3086 val = bswap32(val);
3088 #endif
3089 } else {
3090 /* RAM case */
3091 ptr = qemu_get_ram_ptr(mr->ram_block,
3092 (memory_region_get_ram_addr(mr)
3093 & TARGET_PAGE_MASK)
3094 + addr1);
3095 switch (endian) {
3096 case DEVICE_LITTLE_ENDIAN:
3097 val = ldl_le_p(ptr);
3098 break;
3099 case DEVICE_BIG_ENDIAN:
3100 val = ldl_be_p(ptr);
3101 break;
3102 default:
3103 val = ldl_p(ptr);
3104 break;
3106 r = MEMTX_OK;
3108 if (result) {
3109 *result = r;
3111 if (release_lock) {
3112 qemu_mutex_unlock_iothread();
3114 rcu_read_unlock();
3115 return val;
3118 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3119 MemTxAttrs attrs, MemTxResult *result)
3121 return address_space_ldl_internal(as, addr, attrs, result,
3122 DEVICE_NATIVE_ENDIAN);
3125 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3126 MemTxAttrs attrs, MemTxResult *result)
3128 return address_space_ldl_internal(as, addr, attrs, result,
3129 DEVICE_LITTLE_ENDIAN);
3132 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3133 MemTxAttrs attrs, MemTxResult *result)
3135 return address_space_ldl_internal(as, addr, attrs, result,
3136 DEVICE_BIG_ENDIAN);
3139 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3141 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3144 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3146 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3149 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3151 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3154 /* warning: addr must be aligned */
3155 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3156 MemTxAttrs attrs,
3157 MemTxResult *result,
3158 enum device_endian endian)
3160 uint8_t *ptr;
3161 uint64_t val;
3162 MemoryRegion *mr;
3163 hwaddr l = 8;
3164 hwaddr addr1;
3165 MemTxResult r;
3166 bool release_lock = false;
3168 rcu_read_lock();
3169 mr = address_space_translate(as, addr, &addr1, &l,
3170 false);
3171 if (l < 8 || !memory_access_is_direct(mr, false)) {
3172 release_lock |= prepare_mmio_access(mr);
3174 /* I/O case */
3175 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3176 #if defined(TARGET_WORDS_BIGENDIAN)
3177 if (endian == DEVICE_LITTLE_ENDIAN) {
3178 val = bswap64(val);
3180 #else
3181 if (endian == DEVICE_BIG_ENDIAN) {
3182 val = bswap64(val);
3184 #endif
3185 } else {
3186 /* RAM case */
3187 ptr = qemu_get_ram_ptr(mr->ram_block,
3188 (memory_region_get_ram_addr(mr)
3189 & TARGET_PAGE_MASK)
3190 + addr1);
3191 switch (endian) {
3192 case DEVICE_LITTLE_ENDIAN:
3193 val = ldq_le_p(ptr);
3194 break;
3195 case DEVICE_BIG_ENDIAN:
3196 val = ldq_be_p(ptr);
3197 break;
3198 default:
3199 val = ldq_p(ptr);
3200 break;
3202 r = MEMTX_OK;
3204 if (result) {
3205 *result = r;
3207 if (release_lock) {
3208 qemu_mutex_unlock_iothread();
3210 rcu_read_unlock();
3211 return val;
3214 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3215 MemTxAttrs attrs, MemTxResult *result)
3217 return address_space_ldq_internal(as, addr, attrs, result,
3218 DEVICE_NATIVE_ENDIAN);
3221 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3222 MemTxAttrs attrs, MemTxResult *result)
3224 return address_space_ldq_internal(as, addr, attrs, result,
3225 DEVICE_LITTLE_ENDIAN);
3228 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3229 MemTxAttrs attrs, MemTxResult *result)
3231 return address_space_ldq_internal(as, addr, attrs, result,
3232 DEVICE_BIG_ENDIAN);
3235 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3237 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3240 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3242 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3245 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3247 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3250 /* XXX: optimize */
3251 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3252 MemTxAttrs attrs, MemTxResult *result)
3254 uint8_t val;
3255 MemTxResult r;
3257 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3258 if (result) {
3259 *result = r;
3261 return val;
3264 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3266 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3269 /* warning: addr must be aligned */
3270 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3271 hwaddr addr,
3272 MemTxAttrs attrs,
3273 MemTxResult *result,
3274 enum device_endian endian)
3276 uint8_t *ptr;
3277 uint64_t val;
3278 MemoryRegion *mr;
3279 hwaddr l = 2;
3280 hwaddr addr1;
3281 MemTxResult r;
3282 bool release_lock = false;
3284 rcu_read_lock();
3285 mr = address_space_translate(as, addr, &addr1, &l,
3286 false);
3287 if (l < 2 || !memory_access_is_direct(mr, false)) {
3288 release_lock |= prepare_mmio_access(mr);
3290 /* I/O case */
3291 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3292 #if defined(TARGET_WORDS_BIGENDIAN)
3293 if (endian == DEVICE_LITTLE_ENDIAN) {
3294 val = bswap16(val);
3296 #else
3297 if (endian == DEVICE_BIG_ENDIAN) {
3298 val = bswap16(val);
3300 #endif
3301 } else {
3302 /* RAM case */
3303 ptr = qemu_get_ram_ptr(mr->ram_block,
3304 (memory_region_get_ram_addr(mr)
3305 & TARGET_PAGE_MASK)
3306 + addr1);
3307 switch (endian) {
3308 case DEVICE_LITTLE_ENDIAN:
3309 val = lduw_le_p(ptr);
3310 break;
3311 case DEVICE_BIG_ENDIAN:
3312 val = lduw_be_p(ptr);
3313 break;
3314 default:
3315 val = lduw_p(ptr);
3316 break;
3318 r = MEMTX_OK;
3320 if (result) {
3321 *result = r;
3323 if (release_lock) {
3324 qemu_mutex_unlock_iothread();
3326 rcu_read_unlock();
3327 return val;
3330 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3331 MemTxAttrs attrs, MemTxResult *result)
3333 return address_space_lduw_internal(as, addr, attrs, result,
3334 DEVICE_NATIVE_ENDIAN);
3337 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3338 MemTxAttrs attrs, MemTxResult *result)
3340 return address_space_lduw_internal(as, addr, attrs, result,
3341 DEVICE_LITTLE_ENDIAN);
3344 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3345 MemTxAttrs attrs, MemTxResult *result)
3347 return address_space_lduw_internal(as, addr, attrs, result,
3348 DEVICE_BIG_ENDIAN);
3351 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3353 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3356 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3358 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3361 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3363 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3366 /* warning: addr must be aligned. The ram page is not masked as dirty
3367 and the code inside is not invalidated. It is useful if the dirty
3368 bits are used to track modified PTEs */
3369 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3370 MemTxAttrs attrs, MemTxResult *result)
3372 uint8_t *ptr;
3373 MemoryRegion *mr;
3374 hwaddr l = 4;
3375 hwaddr addr1;
3376 MemTxResult r;
3377 uint8_t dirty_log_mask;
3378 bool release_lock = false;
3380 rcu_read_lock();
3381 mr = address_space_translate(as, addr, &addr1, &l,
3382 true);
3383 if (l < 4 || !memory_access_is_direct(mr, true)) {
3384 release_lock |= prepare_mmio_access(mr);
3386 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3387 } else {
3388 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3389 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3390 stl_p(ptr, val);
3392 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3393 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3394 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3395 r = MEMTX_OK;
3397 if (result) {
3398 *result = r;
3400 if (release_lock) {
3401 qemu_mutex_unlock_iothread();
3403 rcu_read_unlock();
3406 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3408 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3411 /* warning: addr must be aligned */
3412 static inline void address_space_stl_internal(AddressSpace *as,
3413 hwaddr addr, uint32_t val,
3414 MemTxAttrs attrs,
3415 MemTxResult *result,
3416 enum device_endian endian)
3418 uint8_t *ptr;
3419 MemoryRegion *mr;
3420 hwaddr l = 4;
3421 hwaddr addr1;
3422 MemTxResult r;
3423 bool release_lock = false;
3425 rcu_read_lock();
3426 mr = address_space_translate(as, addr, &addr1, &l,
3427 true);
3428 if (l < 4 || !memory_access_is_direct(mr, true)) {
3429 release_lock |= prepare_mmio_access(mr);
3431 #if defined(TARGET_WORDS_BIGENDIAN)
3432 if (endian == DEVICE_LITTLE_ENDIAN) {
3433 val = bswap32(val);
3435 #else
3436 if (endian == DEVICE_BIG_ENDIAN) {
3437 val = bswap32(val);
3439 #endif
3440 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3441 } else {
3442 /* RAM case */
3443 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3444 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3445 switch (endian) {
3446 case DEVICE_LITTLE_ENDIAN:
3447 stl_le_p(ptr, val);
3448 break;
3449 case DEVICE_BIG_ENDIAN:
3450 stl_be_p(ptr, val);
3451 break;
3452 default:
3453 stl_p(ptr, val);
3454 break;
3456 invalidate_and_set_dirty(mr, addr1, 4);
3457 r = MEMTX_OK;
3459 if (result) {
3460 *result = r;
3462 if (release_lock) {
3463 qemu_mutex_unlock_iothread();
3465 rcu_read_unlock();
3468 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3469 MemTxAttrs attrs, MemTxResult *result)
3471 address_space_stl_internal(as, addr, val, attrs, result,
3472 DEVICE_NATIVE_ENDIAN);
3475 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3476 MemTxAttrs attrs, MemTxResult *result)
3478 address_space_stl_internal(as, addr, val, attrs, result,
3479 DEVICE_LITTLE_ENDIAN);
3482 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3483 MemTxAttrs attrs, MemTxResult *result)
3485 address_space_stl_internal(as, addr, val, attrs, result,
3486 DEVICE_BIG_ENDIAN);
3489 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3491 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3494 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3496 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3499 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3501 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3504 /* XXX: optimize */
3505 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3506 MemTxAttrs attrs, MemTxResult *result)
3508 uint8_t v = val;
3509 MemTxResult r;
3511 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3512 if (result) {
3513 *result = r;
3517 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3519 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3522 /* warning: addr must be aligned */
3523 static inline void address_space_stw_internal(AddressSpace *as,
3524 hwaddr addr, uint32_t val,
3525 MemTxAttrs attrs,
3526 MemTxResult *result,
3527 enum device_endian endian)
3529 uint8_t *ptr;
3530 MemoryRegion *mr;
3531 hwaddr l = 2;
3532 hwaddr addr1;
3533 MemTxResult r;
3534 bool release_lock = false;
3536 rcu_read_lock();
3537 mr = address_space_translate(as, addr, &addr1, &l, true);
3538 if (l < 2 || !memory_access_is_direct(mr, true)) {
3539 release_lock |= prepare_mmio_access(mr);
3541 #if defined(TARGET_WORDS_BIGENDIAN)
3542 if (endian == DEVICE_LITTLE_ENDIAN) {
3543 val = bswap16(val);
3545 #else
3546 if (endian == DEVICE_BIG_ENDIAN) {
3547 val = bswap16(val);
3549 #endif
3550 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3551 } else {
3552 /* RAM case */
3553 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3554 ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
3555 switch (endian) {
3556 case DEVICE_LITTLE_ENDIAN:
3557 stw_le_p(ptr, val);
3558 break;
3559 case DEVICE_BIG_ENDIAN:
3560 stw_be_p(ptr, val);
3561 break;
3562 default:
3563 stw_p(ptr, val);
3564 break;
3566 invalidate_and_set_dirty(mr, addr1, 2);
3567 r = MEMTX_OK;
3569 if (result) {
3570 *result = r;
3572 if (release_lock) {
3573 qemu_mutex_unlock_iothread();
3575 rcu_read_unlock();
3578 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3579 MemTxAttrs attrs, MemTxResult *result)
3581 address_space_stw_internal(as, addr, val, attrs, result,
3582 DEVICE_NATIVE_ENDIAN);
3585 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3586 MemTxAttrs attrs, MemTxResult *result)
3588 address_space_stw_internal(as, addr, val, attrs, result,
3589 DEVICE_LITTLE_ENDIAN);
3592 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3593 MemTxAttrs attrs, MemTxResult *result)
3595 address_space_stw_internal(as, addr, val, attrs, result,
3596 DEVICE_BIG_ENDIAN);
3599 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3601 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3604 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3606 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3609 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3611 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3614 /* XXX: optimize */
3615 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3616 MemTxAttrs attrs, MemTxResult *result)
3618 MemTxResult r;
3619 val = tswap64(val);
3620 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3621 if (result) {
3622 *result = r;
3626 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3627 MemTxAttrs attrs, MemTxResult *result)
3629 MemTxResult r;
3630 val = cpu_to_le64(val);
3631 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3632 if (result) {
3633 *result = r;
3636 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3637 MemTxAttrs attrs, MemTxResult *result)
3639 MemTxResult r;
3640 val = cpu_to_be64(val);
3641 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3642 if (result) {
3643 *result = r;
3647 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3649 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3652 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3654 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3657 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3659 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3662 /* virtual memory access for debug (includes writing to ROM) */
3663 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3664 uint8_t *buf, int len, int is_write)
3666 int l;
3667 hwaddr phys_addr;
3668 target_ulong page;
3670 while (len > 0) {
3671 int asidx;
3672 MemTxAttrs attrs;
3674 page = addr & TARGET_PAGE_MASK;
3675 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3676 asidx = cpu_asidx_from_attrs(cpu, attrs);
3677 /* if no physical page mapped, return an error */
3678 if (phys_addr == -1)
3679 return -1;
3680 l = (page + TARGET_PAGE_SIZE) - addr;
3681 if (l > len)
3682 l = len;
3683 phys_addr += (addr & ~TARGET_PAGE_MASK);
3684 if (is_write) {
3685 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3686 phys_addr, buf, l);
3687 } else {
3688 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3689 MEMTXATTRS_UNSPECIFIED,
3690 buf, l, 0);
3692 len -= l;
3693 buf += l;
3694 addr += l;
3696 return 0;
3700 * Allows code that needs to deal with migration bitmaps etc to still be built
3701 * target independent.
3703 size_t qemu_target_page_bits(void)
3705 return TARGET_PAGE_BITS;
3708 #endif
3711 * A helper function for the _utterly broken_ virtio device model to find out if
3712 * it's running on a big endian machine. Don't do this at home kids!
3714 bool target_words_bigendian(void);
3715 bool target_words_bigendian(void)
3717 #if defined(TARGET_WORDS_BIGENDIAN)
3718 return true;
3719 #else
3720 return false;
3721 #endif
3724 #ifndef CONFIG_USER_ONLY
3725 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3727 MemoryRegion*mr;
3728 hwaddr l = 1;
3729 bool res;
3731 rcu_read_lock();
3732 mr = address_space_translate(&address_space_memory,
3733 phys_addr, &phys_addr, &l, false);
3735 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3736 rcu_read_unlock();
3737 return res;
3740 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3742 RAMBlock *block;
3743 int ret = 0;
3745 rcu_read_lock();
3746 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3747 ret = func(block->idstr, block->host, block->offset,
3748 block->used_length, opaque);
3749 if (ret) {
3750 break;
3753 rcu_read_unlock();
3754 return ret;
3756 #endif