ide: replace blk_drain_all by blk_drain
[qemu.git] / exec.c
blob7d67c11601a09f1669a361a1d4282ee55aba2d8c
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #ifndef _WIN32
21 #include <sys/mman.h>
22 #endif
24 #include "qemu-common.h"
25 #include "cpu.h"
26 #include "tcg.h"
27 #include "hw/hw.h"
28 #if !defined(CONFIG_USER_ONLY)
29 #include "hw/boards.h"
30 #endif
31 #include "hw/qdev.h"
32 #include "sysemu/kvm.h"
33 #include "sysemu/sysemu.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
45 #include "trace.h"
46 #endif
47 #include "exec/cpu-all.h"
48 #include "qemu/rcu_queue.h"
49 #include "qemu/main-loop.h"
50 #include "translate-all.h"
51 #include "sysemu/replay.h"
53 #include "exec/memory-internal.h"
54 #include "exec/ram_addr.h"
55 #include "exec/log.h"
57 #include "qemu/range.h"
58 #ifndef _WIN32
59 #include "qemu/mmap-alloc.h"
60 #endif
62 //#define DEBUG_SUBPAGE
64 #if !defined(CONFIG_USER_ONLY)
65 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
66 * are protected by the ramlist lock.
68 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
70 static MemoryRegion *system_memory;
71 static MemoryRegion *system_io;
73 AddressSpace address_space_io;
74 AddressSpace address_space_memory;
76 MemoryRegion io_mem_rom, io_mem_notdirty;
77 static MemoryRegion io_mem_unassigned;
79 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
80 #define RAM_PREALLOC (1 << 0)
82 /* RAM is mmap-ed with MAP_SHARED */
83 #define RAM_SHARED (1 << 1)
85 /* Only a portion of RAM (used_length) is actually used, and migrated.
86 * This used_length size can change across reboots.
88 #define RAM_RESIZEABLE (1 << 2)
90 #endif
92 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
93 /* current CPU in the current thread. It is only valid inside
94 cpu_exec() */
95 __thread CPUState *current_cpu;
96 /* 0 = Do not count executed instructions.
97 1 = Precise instruction counting.
98 2 = Adaptive rate instruction counting. */
99 int use_icount;
101 #if !defined(CONFIG_USER_ONLY)
103 typedef struct PhysPageEntry PhysPageEntry;
105 struct PhysPageEntry {
106 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
107 uint32_t skip : 6;
108 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
109 uint32_t ptr : 26;
112 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
114 /* Size of the L2 (and L3, etc) page tables. */
115 #define ADDR_SPACE_BITS 64
117 #define P_L2_BITS 9
118 #define P_L2_SIZE (1 << P_L2_BITS)
120 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
122 typedef PhysPageEntry Node[P_L2_SIZE];
124 typedef struct PhysPageMap {
125 struct rcu_head rcu;
127 unsigned sections_nb;
128 unsigned sections_nb_alloc;
129 unsigned nodes_nb;
130 unsigned nodes_nb_alloc;
131 Node *nodes;
132 MemoryRegionSection *sections;
133 } PhysPageMap;
135 struct AddressSpaceDispatch {
136 struct rcu_head rcu;
138 /* This is a multi-level map on the physical address space.
139 * The bottom level has pointers to MemoryRegionSections.
141 PhysPageEntry phys_map;
142 PhysPageMap map;
143 AddressSpace *as;
146 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
147 typedef struct subpage_t {
148 MemoryRegion iomem;
149 AddressSpace *as;
150 hwaddr base;
151 uint16_t sub_section[TARGET_PAGE_SIZE];
152 } subpage_t;
154 #define PHYS_SECTION_UNASSIGNED 0
155 #define PHYS_SECTION_NOTDIRTY 1
156 #define PHYS_SECTION_ROM 2
157 #define PHYS_SECTION_WATCH 3
159 static void io_mem_init(void);
160 static void memory_map_init(void);
161 static void tcg_commit(MemoryListener *listener);
163 static MemoryRegion io_mem_watch;
166 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
167 * @cpu: the CPU whose AddressSpace this is
168 * @as: the AddressSpace itself
169 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
170 * @tcg_as_listener: listener for tracking changes to the AddressSpace
172 struct CPUAddressSpace {
173 CPUState *cpu;
174 AddressSpace *as;
175 struct AddressSpaceDispatch *memory_dispatch;
176 MemoryListener tcg_as_listener;
179 #endif
181 #if !defined(CONFIG_USER_ONLY)
183 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
185 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
186 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
187 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
188 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
192 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
194 unsigned i;
195 uint32_t ret;
196 PhysPageEntry e;
197 PhysPageEntry *p;
199 ret = map->nodes_nb++;
200 p = map->nodes[ret];
201 assert(ret != PHYS_MAP_NODE_NIL);
202 assert(ret != map->nodes_nb_alloc);
204 e.skip = leaf ? 0 : 1;
205 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
206 for (i = 0; i < P_L2_SIZE; ++i) {
207 memcpy(&p[i], &e, sizeof(e));
209 return ret;
212 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
213 hwaddr *index, hwaddr *nb, uint16_t leaf,
214 int level)
216 PhysPageEntry *p;
217 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
219 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
220 lp->ptr = phys_map_node_alloc(map, level == 0);
222 p = map->nodes[lp->ptr];
223 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
225 while (*nb && lp < &p[P_L2_SIZE]) {
226 if ((*index & (step - 1)) == 0 && *nb >= step) {
227 lp->skip = 0;
228 lp->ptr = leaf;
229 *index += step;
230 *nb -= step;
231 } else {
232 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
234 ++lp;
238 static void phys_page_set(AddressSpaceDispatch *d,
239 hwaddr index, hwaddr nb,
240 uint16_t leaf)
242 /* Wildly overreserve - it doesn't matter much. */
243 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
245 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
248 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
249 * and update our entry so we can skip it and go directly to the destination.
251 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
253 unsigned valid_ptr = P_L2_SIZE;
254 int valid = 0;
255 PhysPageEntry *p;
256 int i;
258 if (lp->ptr == PHYS_MAP_NODE_NIL) {
259 return;
262 p = nodes[lp->ptr];
263 for (i = 0; i < P_L2_SIZE; i++) {
264 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
265 continue;
268 valid_ptr = i;
269 valid++;
270 if (p[i].skip) {
271 phys_page_compact(&p[i], nodes, compacted);
275 /* We can only compress if there's only one child. */
276 if (valid != 1) {
277 return;
280 assert(valid_ptr < P_L2_SIZE);
282 /* Don't compress if it won't fit in the # of bits we have. */
283 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
284 return;
287 lp->ptr = p[valid_ptr].ptr;
288 if (!p[valid_ptr].skip) {
289 /* If our only child is a leaf, make this a leaf. */
290 /* By design, we should have made this node a leaf to begin with so we
291 * should never reach here.
292 * But since it's so simple to handle this, let's do it just in case we
293 * change this rule.
295 lp->skip = 0;
296 } else {
297 lp->skip += p[valid_ptr].skip;
301 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
303 DECLARE_BITMAP(compacted, nodes_nb);
305 if (d->phys_map.skip) {
306 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
310 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
311 Node *nodes, MemoryRegionSection *sections)
313 PhysPageEntry *p;
314 hwaddr index = addr >> TARGET_PAGE_BITS;
315 int i;
317 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
318 if (lp.ptr == PHYS_MAP_NODE_NIL) {
319 return &sections[PHYS_SECTION_UNASSIGNED];
321 p = nodes[lp.ptr];
322 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
325 if (sections[lp.ptr].size.hi ||
326 range_covers_byte(sections[lp.ptr].offset_within_address_space,
327 sections[lp.ptr].size.lo, addr)) {
328 return &sections[lp.ptr];
329 } else {
330 return &sections[PHYS_SECTION_UNASSIGNED];
334 bool memory_region_is_unassigned(MemoryRegion *mr)
336 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
337 && mr != &io_mem_watch;
340 /* Called from RCU critical section */
341 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
342 hwaddr addr,
343 bool resolve_subpage)
345 MemoryRegionSection *section;
346 subpage_t *subpage;
348 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
349 if (resolve_subpage && section->mr->subpage) {
350 subpage = container_of(section->mr, subpage_t, iomem);
351 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
353 return section;
356 /* Called from RCU critical section */
357 static MemoryRegionSection *
358 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
359 hwaddr *plen, bool resolve_subpage)
361 MemoryRegionSection *section;
362 MemoryRegion *mr;
363 Int128 diff;
365 section = address_space_lookup_region(d, addr, resolve_subpage);
366 /* Compute offset within MemoryRegionSection */
367 addr -= section->offset_within_address_space;
369 /* Compute offset within MemoryRegion */
370 *xlat = addr + section->offset_within_region;
372 mr = section->mr;
374 /* MMIO registers can be expected to perform full-width accesses based only
375 * on their address, without considering adjacent registers that could
376 * decode to completely different MemoryRegions. When such registers
377 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
378 * regions overlap wildly. For this reason we cannot clamp the accesses
379 * here.
381 * If the length is small (as is the case for address_space_ldl/stl),
382 * everything works fine. If the incoming length is large, however,
383 * the caller really has to do the clamping through memory_access_size.
385 if (memory_region_is_ram(mr)) {
386 diff = int128_sub(section->size, int128_make64(addr));
387 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
389 return section;
392 /* Called from RCU critical section */
393 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
394 hwaddr *xlat, hwaddr *plen,
395 bool is_write)
397 IOMMUTLBEntry iotlb;
398 MemoryRegionSection *section;
399 MemoryRegion *mr;
401 for (;;) {
402 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
403 section = address_space_translate_internal(d, addr, &addr, plen, true);
404 mr = section->mr;
406 if (!mr->iommu_ops) {
407 break;
410 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
411 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
412 | (addr & iotlb.addr_mask));
413 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
414 if (!(iotlb.perm & (1 << is_write))) {
415 mr = &io_mem_unassigned;
416 break;
419 as = iotlb.target_as;
422 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
423 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
424 *plen = MIN(page, *plen);
427 *xlat = addr;
428 return mr;
431 /* Called from RCU critical section */
432 MemoryRegionSection *
433 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
434 hwaddr *xlat, hwaddr *plen)
436 MemoryRegionSection *section;
437 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
439 section = address_space_translate_internal(d, addr, xlat, plen, false);
441 assert(!section->mr->iommu_ops);
442 return section;
444 #endif
446 #if !defined(CONFIG_USER_ONLY)
448 static int cpu_common_post_load(void *opaque, int version_id)
450 CPUState *cpu = opaque;
452 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
453 version_id is increased. */
454 cpu->interrupt_request &= ~0x01;
455 tlb_flush(cpu, 1);
457 return 0;
460 static int cpu_common_pre_load(void *opaque)
462 CPUState *cpu = opaque;
464 cpu->exception_index = -1;
466 return 0;
469 static bool cpu_common_exception_index_needed(void *opaque)
471 CPUState *cpu = opaque;
473 return tcg_enabled() && cpu->exception_index != -1;
476 static const VMStateDescription vmstate_cpu_common_exception_index = {
477 .name = "cpu_common/exception_index",
478 .version_id = 1,
479 .minimum_version_id = 1,
480 .needed = cpu_common_exception_index_needed,
481 .fields = (VMStateField[]) {
482 VMSTATE_INT32(exception_index, CPUState),
483 VMSTATE_END_OF_LIST()
487 static bool cpu_common_crash_occurred_needed(void *opaque)
489 CPUState *cpu = opaque;
491 return cpu->crash_occurred;
494 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
495 .name = "cpu_common/crash_occurred",
496 .version_id = 1,
497 .minimum_version_id = 1,
498 .needed = cpu_common_crash_occurred_needed,
499 .fields = (VMStateField[]) {
500 VMSTATE_BOOL(crash_occurred, CPUState),
501 VMSTATE_END_OF_LIST()
505 const VMStateDescription vmstate_cpu_common = {
506 .name = "cpu_common",
507 .version_id = 1,
508 .minimum_version_id = 1,
509 .pre_load = cpu_common_pre_load,
510 .post_load = cpu_common_post_load,
511 .fields = (VMStateField[]) {
512 VMSTATE_UINT32(halted, CPUState),
513 VMSTATE_UINT32(interrupt_request, CPUState),
514 VMSTATE_END_OF_LIST()
516 .subsections = (const VMStateDescription*[]) {
517 &vmstate_cpu_common_exception_index,
518 &vmstate_cpu_common_crash_occurred,
519 NULL
523 #endif
525 CPUState *qemu_get_cpu(int index)
527 CPUState *cpu;
529 CPU_FOREACH(cpu) {
530 if (cpu->cpu_index == index) {
531 return cpu;
535 return NULL;
538 #if !defined(CONFIG_USER_ONLY)
539 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
541 CPUAddressSpace *newas;
543 /* Target code should have set num_ases before calling us */
544 assert(asidx < cpu->num_ases);
546 if (asidx == 0) {
547 /* address space 0 gets the convenience alias */
548 cpu->as = as;
551 /* KVM cannot currently support multiple address spaces. */
552 assert(asidx == 0 || !kvm_enabled());
554 if (!cpu->cpu_ases) {
555 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
558 newas = &cpu->cpu_ases[asidx];
559 newas->cpu = cpu;
560 newas->as = as;
561 if (tcg_enabled()) {
562 newas->tcg_as_listener.commit = tcg_commit;
563 memory_listener_register(&newas->tcg_as_listener, as);
567 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
569 /* Return the AddressSpace corresponding to the specified index */
570 return cpu->cpu_ases[asidx].as;
572 #endif
574 #ifndef CONFIG_USER_ONLY
575 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
577 static int cpu_get_free_index(Error **errp)
579 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
581 if (cpu >= MAX_CPUMASK_BITS) {
582 error_setg(errp, "Trying to use more CPUs than max of %d",
583 MAX_CPUMASK_BITS);
584 return -1;
587 bitmap_set(cpu_index_map, cpu, 1);
588 return cpu;
591 void cpu_exec_exit(CPUState *cpu)
593 if (cpu->cpu_index == -1) {
594 /* cpu_index was never allocated by this @cpu or was already freed. */
595 return;
598 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
599 cpu->cpu_index = -1;
601 #else
603 static int cpu_get_free_index(Error **errp)
605 CPUState *some_cpu;
606 int cpu_index = 0;
608 CPU_FOREACH(some_cpu) {
609 cpu_index++;
611 return cpu_index;
614 void cpu_exec_exit(CPUState *cpu)
617 #endif
619 void cpu_exec_init(CPUState *cpu, Error **errp)
621 CPUClass *cc = CPU_GET_CLASS(cpu);
622 int cpu_index;
623 Error *local_err = NULL;
625 cpu->as = NULL;
626 cpu->num_ases = 0;
628 #ifndef CONFIG_USER_ONLY
629 cpu->thread_id = qemu_get_thread_id();
631 /* This is a softmmu CPU object, so create a property for it
632 * so users can wire up its memory. (This can't go in qom/cpu.c
633 * because that file is compiled only once for both user-mode
634 * and system builds.) The default if no link is set up is to use
635 * the system address space.
637 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
638 (Object **)&cpu->memory,
639 qdev_prop_allow_set_link_before_realize,
640 OBJ_PROP_LINK_UNREF_ON_RELEASE,
641 &error_abort);
642 cpu->memory = system_memory;
643 object_ref(OBJECT(cpu->memory));
644 #endif
646 #if defined(CONFIG_USER_ONLY)
647 cpu_list_lock();
648 #endif
649 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
650 if (local_err) {
651 error_propagate(errp, local_err);
652 #if defined(CONFIG_USER_ONLY)
653 cpu_list_unlock();
654 #endif
655 return;
657 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
658 #if defined(CONFIG_USER_ONLY)
659 cpu_list_unlock();
660 #endif
661 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
662 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
664 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
665 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
666 cpu_save, cpu_load, cpu->env_ptr);
667 assert(cc->vmsd == NULL);
668 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
669 #endif
670 if (cc->vmsd != NULL) {
671 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
675 #if defined(CONFIG_USER_ONLY)
676 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
678 tb_invalidate_phys_page_range(pc, pc + 1, 0);
680 #else
681 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
683 MemTxAttrs attrs;
684 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
685 int asidx = cpu_asidx_from_attrs(cpu, attrs);
686 if (phys != -1) {
687 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
688 phys | (pc & ~TARGET_PAGE_MASK));
691 #endif
693 #if defined(CONFIG_USER_ONLY)
694 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
699 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
700 int flags)
702 return -ENOSYS;
705 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
709 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
710 int flags, CPUWatchpoint **watchpoint)
712 return -ENOSYS;
714 #else
715 /* Add a watchpoint. */
716 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
717 int flags, CPUWatchpoint **watchpoint)
719 CPUWatchpoint *wp;
721 /* forbid ranges which are empty or run off the end of the address space */
722 if (len == 0 || (addr + len - 1) < addr) {
723 error_report("tried to set invalid watchpoint at %"
724 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
725 return -EINVAL;
727 wp = g_malloc(sizeof(*wp));
729 wp->vaddr = addr;
730 wp->len = len;
731 wp->flags = flags;
733 /* keep all GDB-injected watchpoints in front */
734 if (flags & BP_GDB) {
735 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
736 } else {
737 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
740 tlb_flush_page(cpu, addr);
742 if (watchpoint)
743 *watchpoint = wp;
744 return 0;
747 /* Remove a specific watchpoint. */
748 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
749 int flags)
751 CPUWatchpoint *wp;
753 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
754 if (addr == wp->vaddr && len == wp->len
755 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
756 cpu_watchpoint_remove_by_ref(cpu, wp);
757 return 0;
760 return -ENOENT;
763 /* Remove a specific watchpoint by reference. */
764 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
766 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
768 tlb_flush_page(cpu, watchpoint->vaddr);
770 g_free(watchpoint);
773 /* Remove all matching watchpoints. */
774 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
776 CPUWatchpoint *wp, *next;
778 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
779 if (wp->flags & mask) {
780 cpu_watchpoint_remove_by_ref(cpu, wp);
785 /* Return true if this watchpoint address matches the specified
786 * access (ie the address range covered by the watchpoint overlaps
787 * partially or completely with the address range covered by the
788 * access).
790 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
791 vaddr addr,
792 vaddr len)
794 /* We know the lengths are non-zero, but a little caution is
795 * required to avoid errors in the case where the range ends
796 * exactly at the top of the address space and so addr + len
797 * wraps round to zero.
799 vaddr wpend = wp->vaddr + wp->len - 1;
800 vaddr addrend = addr + len - 1;
802 return !(addr > wpend || wp->vaddr > addrend);
805 #endif
807 /* Add a breakpoint. */
808 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
809 CPUBreakpoint **breakpoint)
811 CPUBreakpoint *bp;
813 bp = g_malloc(sizeof(*bp));
815 bp->pc = pc;
816 bp->flags = flags;
818 /* keep all GDB-injected breakpoints in front */
819 if (flags & BP_GDB) {
820 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
821 } else {
822 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
825 breakpoint_invalidate(cpu, pc);
827 if (breakpoint) {
828 *breakpoint = bp;
830 return 0;
833 /* Remove a specific breakpoint. */
834 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
836 CPUBreakpoint *bp;
838 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
839 if (bp->pc == pc && bp->flags == flags) {
840 cpu_breakpoint_remove_by_ref(cpu, bp);
841 return 0;
844 return -ENOENT;
847 /* Remove a specific breakpoint by reference. */
848 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
850 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
852 breakpoint_invalidate(cpu, breakpoint->pc);
854 g_free(breakpoint);
857 /* Remove all matching breakpoints. */
858 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
860 CPUBreakpoint *bp, *next;
862 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
863 if (bp->flags & mask) {
864 cpu_breakpoint_remove_by_ref(cpu, bp);
869 /* enable or disable single step mode. EXCP_DEBUG is returned by the
870 CPU loop after each instruction */
871 void cpu_single_step(CPUState *cpu, int enabled)
873 if (cpu->singlestep_enabled != enabled) {
874 cpu->singlestep_enabled = enabled;
875 if (kvm_enabled()) {
876 kvm_update_guest_debug(cpu, 0);
877 } else {
878 /* must flush all the translated code to avoid inconsistencies */
879 /* XXX: only flush what is necessary */
880 tb_flush(cpu);
885 void cpu_abort(CPUState *cpu, const char *fmt, ...)
887 va_list ap;
888 va_list ap2;
890 va_start(ap, fmt);
891 va_copy(ap2, ap);
892 fprintf(stderr, "qemu: fatal: ");
893 vfprintf(stderr, fmt, ap);
894 fprintf(stderr, "\n");
895 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
896 if (qemu_log_separate()) {
897 qemu_log("qemu: fatal: ");
898 qemu_log_vprintf(fmt, ap2);
899 qemu_log("\n");
900 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
901 qemu_log_flush();
902 qemu_log_close();
904 va_end(ap2);
905 va_end(ap);
906 replay_finish();
907 #if defined(CONFIG_USER_ONLY)
909 struct sigaction act;
910 sigfillset(&act.sa_mask);
911 act.sa_handler = SIG_DFL;
912 sigaction(SIGABRT, &act, NULL);
914 #endif
915 abort();
918 #if !defined(CONFIG_USER_ONLY)
919 /* Called from RCU critical section */
920 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
922 RAMBlock *block;
924 block = atomic_rcu_read(&ram_list.mru_block);
925 if (block && addr - block->offset < block->max_length) {
926 return block;
928 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
929 if (addr - block->offset < block->max_length) {
930 goto found;
934 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
935 abort();
937 found:
938 /* It is safe to write mru_block outside the iothread lock. This
939 * is what happens:
941 * mru_block = xxx
942 * rcu_read_unlock()
943 * xxx removed from list
944 * rcu_read_lock()
945 * read mru_block
946 * mru_block = NULL;
947 * call_rcu(reclaim_ramblock, xxx);
948 * rcu_read_unlock()
950 * atomic_rcu_set is not needed here. The block was already published
951 * when it was placed into the list. Here we're just making an extra
952 * copy of the pointer.
954 ram_list.mru_block = block;
955 return block;
958 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
960 CPUState *cpu;
961 ram_addr_t start1;
962 RAMBlock *block;
963 ram_addr_t end;
965 end = TARGET_PAGE_ALIGN(start + length);
966 start &= TARGET_PAGE_MASK;
968 rcu_read_lock();
969 block = qemu_get_ram_block(start);
970 assert(block == qemu_get_ram_block(end - 1));
971 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
972 CPU_FOREACH(cpu) {
973 tlb_reset_dirty(cpu, start1, length);
975 rcu_read_unlock();
978 /* Note: start and end must be within the same ram block. */
979 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
980 ram_addr_t length,
981 unsigned client)
983 DirtyMemoryBlocks *blocks;
984 unsigned long end, page;
985 bool dirty = false;
987 if (length == 0) {
988 return false;
991 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
992 page = start >> TARGET_PAGE_BITS;
994 rcu_read_lock();
996 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
998 while (page < end) {
999 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1000 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1001 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1003 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1004 offset, num);
1005 page += num;
1008 rcu_read_unlock();
1010 if (dirty && tcg_enabled()) {
1011 tlb_reset_dirty_range_all(start, length);
1014 return dirty;
1017 /* Called from RCU critical section */
1018 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1019 MemoryRegionSection *section,
1020 target_ulong vaddr,
1021 hwaddr paddr, hwaddr xlat,
1022 int prot,
1023 target_ulong *address)
1025 hwaddr iotlb;
1026 CPUWatchpoint *wp;
1028 if (memory_region_is_ram(section->mr)) {
1029 /* Normal RAM. */
1030 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1031 + xlat;
1032 if (!section->readonly) {
1033 iotlb |= PHYS_SECTION_NOTDIRTY;
1034 } else {
1035 iotlb |= PHYS_SECTION_ROM;
1037 } else {
1038 AddressSpaceDispatch *d;
1040 d = atomic_rcu_read(&section->address_space->dispatch);
1041 iotlb = section - d->map.sections;
1042 iotlb += xlat;
1045 /* Make accesses to pages with watchpoints go via the
1046 watchpoint trap routines. */
1047 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1048 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1049 /* Avoid trapping reads of pages with a write breakpoint. */
1050 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1051 iotlb = PHYS_SECTION_WATCH + paddr;
1052 *address |= TLB_MMIO;
1053 break;
1058 return iotlb;
1060 #endif /* defined(CONFIG_USER_ONLY) */
1062 #if !defined(CONFIG_USER_ONLY)
1064 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1065 uint16_t section);
1066 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1068 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1069 qemu_anon_ram_alloc;
1072 * Set a custom physical guest memory alloator.
1073 * Accelerators with unusual needs may need this. Hopefully, we can
1074 * get rid of it eventually.
1076 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1078 phys_mem_alloc = alloc;
1081 static uint16_t phys_section_add(PhysPageMap *map,
1082 MemoryRegionSection *section)
1084 /* The physical section number is ORed with a page-aligned
1085 * pointer to produce the iotlb entries. Thus it should
1086 * never overflow into the page-aligned value.
1088 assert(map->sections_nb < TARGET_PAGE_SIZE);
1090 if (map->sections_nb == map->sections_nb_alloc) {
1091 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1092 map->sections = g_renew(MemoryRegionSection, map->sections,
1093 map->sections_nb_alloc);
1095 map->sections[map->sections_nb] = *section;
1096 memory_region_ref(section->mr);
1097 return map->sections_nb++;
1100 static void phys_section_destroy(MemoryRegion *mr)
1102 bool have_sub_page = mr->subpage;
1104 memory_region_unref(mr);
1106 if (have_sub_page) {
1107 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1108 object_unref(OBJECT(&subpage->iomem));
1109 g_free(subpage);
1113 static void phys_sections_free(PhysPageMap *map)
1115 while (map->sections_nb > 0) {
1116 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1117 phys_section_destroy(section->mr);
1119 g_free(map->sections);
1120 g_free(map->nodes);
1123 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1125 subpage_t *subpage;
1126 hwaddr base = section->offset_within_address_space
1127 & TARGET_PAGE_MASK;
1128 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1129 d->map.nodes, d->map.sections);
1130 MemoryRegionSection subsection = {
1131 .offset_within_address_space = base,
1132 .size = int128_make64(TARGET_PAGE_SIZE),
1134 hwaddr start, end;
1136 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1138 if (!(existing->mr->subpage)) {
1139 subpage = subpage_init(d->as, base);
1140 subsection.address_space = d->as;
1141 subsection.mr = &subpage->iomem;
1142 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1143 phys_section_add(&d->map, &subsection));
1144 } else {
1145 subpage = container_of(existing->mr, subpage_t, iomem);
1147 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1148 end = start + int128_get64(section->size) - 1;
1149 subpage_register(subpage, start, end,
1150 phys_section_add(&d->map, section));
1154 static void register_multipage(AddressSpaceDispatch *d,
1155 MemoryRegionSection *section)
1157 hwaddr start_addr = section->offset_within_address_space;
1158 uint16_t section_index = phys_section_add(&d->map, section);
1159 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1160 TARGET_PAGE_BITS));
1162 assert(num_pages);
1163 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1166 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1168 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1169 AddressSpaceDispatch *d = as->next_dispatch;
1170 MemoryRegionSection now = *section, remain = *section;
1171 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1173 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1174 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1175 - now.offset_within_address_space;
1177 now.size = int128_min(int128_make64(left), now.size);
1178 register_subpage(d, &now);
1179 } else {
1180 now.size = int128_zero();
1182 while (int128_ne(remain.size, now.size)) {
1183 remain.size = int128_sub(remain.size, now.size);
1184 remain.offset_within_address_space += int128_get64(now.size);
1185 remain.offset_within_region += int128_get64(now.size);
1186 now = remain;
1187 if (int128_lt(remain.size, page_size)) {
1188 register_subpage(d, &now);
1189 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1190 now.size = page_size;
1191 register_subpage(d, &now);
1192 } else {
1193 now.size = int128_and(now.size, int128_neg(page_size));
1194 register_multipage(d, &now);
1199 void qemu_flush_coalesced_mmio_buffer(void)
1201 if (kvm_enabled())
1202 kvm_flush_coalesced_mmio_buffer();
1205 void qemu_mutex_lock_ramlist(void)
1207 qemu_mutex_lock(&ram_list.mutex);
1210 void qemu_mutex_unlock_ramlist(void)
1212 qemu_mutex_unlock(&ram_list.mutex);
1215 #ifdef __linux__
1217 #include <sys/vfs.h>
1219 #define HUGETLBFS_MAGIC 0x958458f6
1221 static long gethugepagesize(const char *path, Error **errp)
1223 struct statfs fs;
1224 int ret;
1226 do {
1227 ret = statfs(path, &fs);
1228 } while (ret != 0 && errno == EINTR);
1230 if (ret != 0) {
1231 error_setg_errno(errp, errno, "failed to get page size of file %s",
1232 path);
1233 return 0;
1236 return fs.f_bsize;
1239 static void *file_ram_alloc(RAMBlock *block,
1240 ram_addr_t memory,
1241 const char *path,
1242 Error **errp)
1244 struct stat st;
1245 char *filename;
1246 char *sanitized_name;
1247 char *c;
1248 void *area;
1249 int fd;
1250 uint64_t hpagesize;
1251 Error *local_err = NULL;
1253 hpagesize = gethugepagesize(path, &local_err);
1254 if (local_err) {
1255 error_propagate(errp, local_err);
1256 goto error;
1258 block->mr->align = hpagesize;
1260 if (memory < hpagesize) {
1261 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1262 "or larger than huge page size 0x%" PRIx64,
1263 memory, hpagesize);
1264 goto error;
1267 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1268 error_setg(errp,
1269 "host lacks kvm mmu notifiers, -mem-path unsupported");
1270 goto error;
1273 if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1274 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1275 sanitized_name = g_strdup(memory_region_name(block->mr));
1276 for (c = sanitized_name; *c != '\0'; c++) {
1277 if (*c == '/') {
1278 *c = '_';
1282 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1283 sanitized_name);
1284 g_free(sanitized_name);
1286 fd = mkstemp(filename);
1287 if (fd >= 0) {
1288 unlink(filename);
1290 g_free(filename);
1291 } else {
1292 fd = open(path, O_RDWR | O_CREAT, 0644);
1295 if (fd < 0) {
1296 error_setg_errno(errp, errno,
1297 "unable to create backing store for hugepages");
1298 goto error;
1301 memory = ROUND_UP(memory, hpagesize);
1304 * ftruncate is not supported by hugetlbfs in older
1305 * hosts, so don't bother bailing out on errors.
1306 * If anything goes wrong with it under other filesystems,
1307 * mmap will fail.
1309 if (ftruncate(fd, memory)) {
1310 perror("ftruncate");
1313 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1314 if (area == MAP_FAILED) {
1315 error_setg_errno(errp, errno,
1316 "unable to map backing store for hugepages");
1317 close(fd);
1318 goto error;
1321 if (mem_prealloc) {
1322 os_mem_prealloc(fd, area, memory);
1325 block->fd = fd;
1326 return area;
1328 error:
1329 return NULL;
1331 #endif
1333 /* Called with the ramlist lock held. */
1334 static ram_addr_t find_ram_offset(ram_addr_t size)
1336 RAMBlock *block, *next_block;
1337 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1339 assert(size != 0); /* it would hand out same offset multiple times */
1341 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1342 return 0;
1345 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1346 ram_addr_t end, next = RAM_ADDR_MAX;
1348 end = block->offset + block->max_length;
1350 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1351 if (next_block->offset >= end) {
1352 next = MIN(next, next_block->offset);
1355 if (next - end >= size && next - end < mingap) {
1356 offset = end;
1357 mingap = next - end;
1361 if (offset == RAM_ADDR_MAX) {
1362 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1363 (uint64_t)size);
1364 abort();
1367 return offset;
1370 ram_addr_t last_ram_offset(void)
1372 RAMBlock *block;
1373 ram_addr_t last = 0;
1375 rcu_read_lock();
1376 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1377 last = MAX(last, block->offset + block->max_length);
1379 rcu_read_unlock();
1380 return last;
1383 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1385 int ret;
1387 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1388 if (!machine_dump_guest_core(current_machine)) {
1389 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1390 if (ret) {
1391 perror("qemu_madvise");
1392 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1393 "but dump_guest_core=off specified\n");
1398 /* Called within an RCU critical section, or while the ramlist lock
1399 * is held.
1401 static RAMBlock *find_ram_block(ram_addr_t addr)
1403 RAMBlock *block;
1405 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1406 if (block->offset == addr) {
1407 return block;
1411 return NULL;
1414 const char *qemu_ram_get_idstr(RAMBlock *rb)
1416 return rb->idstr;
1419 /* Called with iothread lock held. */
1420 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1422 RAMBlock *new_block, *block;
1424 rcu_read_lock();
1425 new_block = find_ram_block(addr);
1426 assert(new_block);
1427 assert(!new_block->idstr[0]);
1429 if (dev) {
1430 char *id = qdev_get_dev_path(dev);
1431 if (id) {
1432 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1433 g_free(id);
1436 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1438 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1439 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1440 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1441 new_block->idstr);
1442 abort();
1445 rcu_read_unlock();
1448 /* Called with iothread lock held. */
1449 void qemu_ram_unset_idstr(ram_addr_t addr)
1451 RAMBlock *block;
1453 /* FIXME: arch_init.c assumes that this is not called throughout
1454 * migration. Ignore the problem since hot-unplug during migration
1455 * does not work anyway.
1458 rcu_read_lock();
1459 block = find_ram_block(addr);
1460 if (block) {
1461 memset(block->idstr, 0, sizeof(block->idstr));
1463 rcu_read_unlock();
1466 static int memory_try_enable_merging(void *addr, size_t len)
1468 if (!machine_mem_merge(current_machine)) {
1469 /* disabled by the user */
1470 return 0;
1473 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1476 /* Only legal before guest might have detected the memory size: e.g. on
1477 * incoming migration, or right after reset.
1479 * As memory core doesn't know how is memory accessed, it is up to
1480 * resize callback to update device state and/or add assertions to detect
1481 * misuse, if necessary.
1483 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1485 RAMBlock *block = find_ram_block(base);
1487 assert(block);
1489 newsize = HOST_PAGE_ALIGN(newsize);
1491 if (block->used_length == newsize) {
1492 return 0;
1495 if (!(block->flags & RAM_RESIZEABLE)) {
1496 error_setg_errno(errp, EINVAL,
1497 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1498 " in != 0x" RAM_ADDR_FMT, block->idstr,
1499 newsize, block->used_length);
1500 return -EINVAL;
1503 if (block->max_length < newsize) {
1504 error_setg_errno(errp, EINVAL,
1505 "Length too large: %s: 0x" RAM_ADDR_FMT
1506 " > 0x" RAM_ADDR_FMT, block->idstr,
1507 newsize, block->max_length);
1508 return -EINVAL;
1511 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1512 block->used_length = newsize;
1513 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1514 DIRTY_CLIENTS_ALL);
1515 memory_region_set_size(block->mr, newsize);
1516 if (block->resized) {
1517 block->resized(block->idstr, newsize, block->host);
1519 return 0;
1522 /* Called with ram_list.mutex held */
1523 static void dirty_memory_extend(ram_addr_t old_ram_size,
1524 ram_addr_t new_ram_size)
1526 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1527 DIRTY_MEMORY_BLOCK_SIZE);
1528 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1529 DIRTY_MEMORY_BLOCK_SIZE);
1530 int i;
1532 /* Only need to extend if block count increased */
1533 if (new_num_blocks <= old_num_blocks) {
1534 return;
1537 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1538 DirtyMemoryBlocks *old_blocks;
1539 DirtyMemoryBlocks *new_blocks;
1540 int j;
1542 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1543 new_blocks = g_malloc(sizeof(*new_blocks) +
1544 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1546 if (old_num_blocks) {
1547 memcpy(new_blocks->blocks, old_blocks->blocks,
1548 old_num_blocks * sizeof(old_blocks->blocks[0]));
1551 for (j = old_num_blocks; j < new_num_blocks; j++) {
1552 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1555 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1557 if (old_blocks) {
1558 g_free_rcu(old_blocks, rcu);
1563 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1565 RAMBlock *block;
1566 RAMBlock *last_block = NULL;
1567 ram_addr_t old_ram_size, new_ram_size;
1568 Error *err = NULL;
1570 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1572 qemu_mutex_lock_ramlist();
1573 new_block->offset = find_ram_offset(new_block->max_length);
1575 if (!new_block->host) {
1576 if (xen_enabled()) {
1577 xen_ram_alloc(new_block->offset, new_block->max_length,
1578 new_block->mr, &err);
1579 if (err) {
1580 error_propagate(errp, err);
1581 qemu_mutex_unlock_ramlist();
1582 return -1;
1584 } else {
1585 new_block->host = phys_mem_alloc(new_block->max_length,
1586 &new_block->mr->align);
1587 if (!new_block->host) {
1588 error_setg_errno(errp, errno,
1589 "cannot set up guest memory '%s'",
1590 memory_region_name(new_block->mr));
1591 qemu_mutex_unlock_ramlist();
1592 return -1;
1594 memory_try_enable_merging(new_block->host, new_block->max_length);
1598 new_ram_size = MAX(old_ram_size,
1599 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1600 if (new_ram_size > old_ram_size) {
1601 migration_bitmap_extend(old_ram_size, new_ram_size);
1602 dirty_memory_extend(old_ram_size, new_ram_size);
1604 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1605 * QLIST (which has an RCU-friendly variant) does not have insertion at
1606 * tail, so save the last element in last_block.
1608 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1609 last_block = block;
1610 if (block->max_length < new_block->max_length) {
1611 break;
1614 if (block) {
1615 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1616 } else if (last_block) {
1617 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1618 } else { /* list is empty */
1619 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1621 ram_list.mru_block = NULL;
1623 /* Write list before version */
1624 smp_wmb();
1625 ram_list.version++;
1626 qemu_mutex_unlock_ramlist();
1628 cpu_physical_memory_set_dirty_range(new_block->offset,
1629 new_block->used_length,
1630 DIRTY_CLIENTS_ALL);
1632 if (new_block->host) {
1633 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1634 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1635 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1636 if (kvm_enabled()) {
1637 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1641 return new_block->offset;
1644 #ifdef __linux__
1645 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1646 bool share, const char *mem_path,
1647 Error **errp)
1649 RAMBlock *new_block;
1650 ram_addr_t addr;
1651 Error *local_err = NULL;
1653 if (xen_enabled()) {
1654 error_setg(errp, "-mem-path not supported with Xen");
1655 return -1;
1658 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1660 * file_ram_alloc() needs to allocate just like
1661 * phys_mem_alloc, but we haven't bothered to provide
1662 * a hook there.
1664 error_setg(errp,
1665 "-mem-path not supported with this accelerator");
1666 return -1;
1669 size = HOST_PAGE_ALIGN(size);
1670 new_block = g_malloc0(sizeof(*new_block));
1671 new_block->mr = mr;
1672 new_block->used_length = size;
1673 new_block->max_length = size;
1674 new_block->flags = share ? RAM_SHARED : 0;
1675 new_block->host = file_ram_alloc(new_block, size,
1676 mem_path, errp);
1677 if (!new_block->host) {
1678 g_free(new_block);
1679 return -1;
1682 addr = ram_block_add(new_block, &local_err);
1683 if (local_err) {
1684 g_free(new_block);
1685 error_propagate(errp, local_err);
1686 return -1;
1688 return addr;
1690 #endif
1692 static
1693 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1694 void (*resized)(const char*,
1695 uint64_t length,
1696 void *host),
1697 void *host, bool resizeable,
1698 MemoryRegion *mr, Error **errp)
1700 RAMBlock *new_block;
1701 ram_addr_t addr;
1702 Error *local_err = NULL;
1704 size = HOST_PAGE_ALIGN(size);
1705 max_size = HOST_PAGE_ALIGN(max_size);
1706 new_block = g_malloc0(sizeof(*new_block));
1707 new_block->mr = mr;
1708 new_block->resized = resized;
1709 new_block->used_length = size;
1710 new_block->max_length = max_size;
1711 assert(max_size >= size);
1712 new_block->fd = -1;
1713 new_block->host = host;
1714 if (host) {
1715 new_block->flags |= RAM_PREALLOC;
1717 if (resizeable) {
1718 new_block->flags |= RAM_RESIZEABLE;
1720 addr = ram_block_add(new_block, &local_err);
1721 if (local_err) {
1722 g_free(new_block);
1723 error_propagate(errp, local_err);
1724 return -1;
1726 return addr;
1729 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1730 MemoryRegion *mr, Error **errp)
1732 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1735 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1737 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1740 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1741 void (*resized)(const char*,
1742 uint64_t length,
1743 void *host),
1744 MemoryRegion *mr, Error **errp)
1746 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1749 static void reclaim_ramblock(RAMBlock *block)
1751 if (block->flags & RAM_PREALLOC) {
1753 } else if (xen_enabled()) {
1754 xen_invalidate_map_cache_entry(block->host);
1755 #ifndef _WIN32
1756 } else if (block->fd >= 0) {
1757 qemu_ram_munmap(block->host, block->max_length);
1758 close(block->fd);
1759 #endif
1760 } else {
1761 qemu_anon_ram_free(block->host, block->max_length);
1763 g_free(block);
1766 void qemu_ram_free(ram_addr_t addr)
1768 RAMBlock *block;
1770 qemu_mutex_lock_ramlist();
1771 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1772 if (addr == block->offset) {
1773 QLIST_REMOVE_RCU(block, next);
1774 ram_list.mru_block = NULL;
1775 /* Write list before version */
1776 smp_wmb();
1777 ram_list.version++;
1778 call_rcu(block, reclaim_ramblock, rcu);
1779 break;
1782 qemu_mutex_unlock_ramlist();
1785 #ifndef _WIN32
1786 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1788 RAMBlock *block;
1789 ram_addr_t offset;
1790 int flags;
1791 void *area, *vaddr;
1793 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1794 offset = addr - block->offset;
1795 if (offset < block->max_length) {
1796 vaddr = ramblock_ptr(block, offset);
1797 if (block->flags & RAM_PREALLOC) {
1799 } else if (xen_enabled()) {
1800 abort();
1801 } else {
1802 flags = MAP_FIXED;
1803 if (block->fd >= 0) {
1804 flags |= (block->flags & RAM_SHARED ?
1805 MAP_SHARED : MAP_PRIVATE);
1806 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1807 flags, block->fd, offset);
1808 } else {
1810 * Remap needs to match alloc. Accelerators that
1811 * set phys_mem_alloc never remap. If they did,
1812 * we'd need a remap hook here.
1814 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1816 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1817 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1818 flags, -1, 0);
1820 if (area != vaddr) {
1821 fprintf(stderr, "Could not remap addr: "
1822 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1823 length, addr);
1824 exit(1);
1826 memory_try_enable_merging(vaddr, length);
1827 qemu_ram_setup_dump(vaddr, length);
1832 #endif /* !_WIN32 */
1834 int qemu_get_ram_fd(ram_addr_t addr)
1836 RAMBlock *block;
1837 int fd;
1839 rcu_read_lock();
1840 block = qemu_get_ram_block(addr);
1841 fd = block->fd;
1842 rcu_read_unlock();
1843 return fd;
1846 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1848 RAMBlock *block;
1850 rcu_read_lock();
1851 block = qemu_get_ram_block(addr);
1852 block->fd = fd;
1853 rcu_read_unlock();
1856 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1858 RAMBlock *block;
1859 void *ptr;
1861 rcu_read_lock();
1862 block = qemu_get_ram_block(addr);
1863 ptr = ramblock_ptr(block, 0);
1864 rcu_read_unlock();
1865 return ptr;
1868 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1869 * This should not be used for general purpose DMA. Use address_space_map
1870 * or address_space_rw instead. For local memory (e.g. video ram) that the
1871 * device owns, use memory_region_get_ram_ptr.
1873 * Called within RCU critical section.
1875 void *qemu_get_ram_ptr(ram_addr_t addr)
1877 RAMBlock *block = qemu_get_ram_block(addr);
1879 if (xen_enabled() && block->host == NULL) {
1880 /* We need to check if the requested address is in the RAM
1881 * because we don't want to map the entire memory in QEMU.
1882 * In that case just map until the end of the page.
1884 if (block->offset == 0) {
1885 return xen_map_cache(addr, 0, 0);
1888 block->host = xen_map_cache(block->offset, block->max_length, 1);
1890 return ramblock_ptr(block, addr - block->offset);
1893 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1894 * but takes a size argument.
1896 * Called within RCU critical section.
1898 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1900 RAMBlock *block;
1901 ram_addr_t offset_inside_block;
1902 if (*size == 0) {
1903 return NULL;
1906 block = qemu_get_ram_block(addr);
1907 offset_inside_block = addr - block->offset;
1908 *size = MIN(*size, block->max_length - offset_inside_block);
1910 if (xen_enabled() && block->host == NULL) {
1911 /* We need to check if the requested address is in the RAM
1912 * because we don't want to map the entire memory in QEMU.
1913 * In that case just map the requested area.
1915 if (block->offset == 0) {
1916 return xen_map_cache(addr, *size, 1);
1919 block->host = xen_map_cache(block->offset, block->max_length, 1);
1922 return ramblock_ptr(block, offset_inside_block);
1926 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1927 * in that RAMBlock.
1929 * ptr: Host pointer to look up
1930 * round_offset: If true round the result offset down to a page boundary
1931 * *ram_addr: set to result ram_addr
1932 * *offset: set to result offset within the RAMBlock
1934 * Returns: RAMBlock (or NULL if not found)
1936 * By the time this function returns, the returned pointer is not protected
1937 * by RCU anymore. If the caller is not within an RCU critical section and
1938 * does not hold the iothread lock, it must have other means of protecting the
1939 * pointer, such as a reference to the region that includes the incoming
1940 * ram_addr_t.
1942 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1943 ram_addr_t *ram_addr,
1944 ram_addr_t *offset)
1946 RAMBlock *block;
1947 uint8_t *host = ptr;
1949 if (xen_enabled()) {
1950 rcu_read_lock();
1951 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1952 block = qemu_get_ram_block(*ram_addr);
1953 if (block) {
1954 *offset = (host - block->host);
1956 rcu_read_unlock();
1957 return block;
1960 rcu_read_lock();
1961 block = atomic_rcu_read(&ram_list.mru_block);
1962 if (block && block->host && host - block->host < block->max_length) {
1963 goto found;
1966 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1967 /* This case append when the block is not mapped. */
1968 if (block->host == NULL) {
1969 continue;
1971 if (host - block->host < block->max_length) {
1972 goto found;
1976 rcu_read_unlock();
1977 return NULL;
1979 found:
1980 *offset = (host - block->host);
1981 if (round_offset) {
1982 *offset &= TARGET_PAGE_MASK;
1984 *ram_addr = block->offset + *offset;
1985 rcu_read_unlock();
1986 return block;
1990 * Finds the named RAMBlock
1992 * name: The name of RAMBlock to find
1994 * Returns: RAMBlock (or NULL if not found)
1996 RAMBlock *qemu_ram_block_by_name(const char *name)
1998 RAMBlock *block;
2000 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2001 if (!strcmp(name, block->idstr)) {
2002 return block;
2006 return NULL;
2009 /* Some of the softmmu routines need to translate from a host pointer
2010 (typically a TLB entry) back to a ram offset. */
2011 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2013 RAMBlock *block;
2014 ram_addr_t offset; /* Not used */
2016 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2018 if (!block) {
2019 return NULL;
2022 return block->mr;
2025 /* Called within RCU critical section. */
2026 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2027 uint64_t val, unsigned size)
2029 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2030 tb_invalidate_phys_page_fast(ram_addr, size);
2032 switch (size) {
2033 case 1:
2034 stb_p(qemu_get_ram_ptr(ram_addr), val);
2035 break;
2036 case 2:
2037 stw_p(qemu_get_ram_ptr(ram_addr), val);
2038 break;
2039 case 4:
2040 stl_p(qemu_get_ram_ptr(ram_addr), val);
2041 break;
2042 default:
2043 abort();
2045 /* Set both VGA and migration bits for simplicity and to remove
2046 * the notdirty callback faster.
2048 cpu_physical_memory_set_dirty_range(ram_addr, size,
2049 DIRTY_CLIENTS_NOCODE);
2050 /* we remove the notdirty callback only if the code has been
2051 flushed */
2052 if (!cpu_physical_memory_is_clean(ram_addr)) {
2053 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2057 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2058 unsigned size, bool is_write)
2060 return is_write;
2063 static const MemoryRegionOps notdirty_mem_ops = {
2064 .write = notdirty_mem_write,
2065 .valid.accepts = notdirty_mem_accepts,
2066 .endianness = DEVICE_NATIVE_ENDIAN,
2069 /* Generate a debug exception if a watchpoint has been hit. */
2070 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2072 CPUState *cpu = current_cpu;
2073 CPUArchState *env = cpu->env_ptr;
2074 target_ulong pc, cs_base;
2075 target_ulong vaddr;
2076 CPUWatchpoint *wp;
2077 int cpu_flags;
2079 if (cpu->watchpoint_hit) {
2080 /* We re-entered the check after replacing the TB. Now raise
2081 * the debug interrupt so that is will trigger after the
2082 * current instruction. */
2083 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2084 return;
2086 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2087 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2088 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2089 && (wp->flags & flags)) {
2090 if (flags == BP_MEM_READ) {
2091 wp->flags |= BP_WATCHPOINT_HIT_READ;
2092 } else {
2093 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2095 wp->hitaddr = vaddr;
2096 wp->hitattrs = attrs;
2097 if (!cpu->watchpoint_hit) {
2098 cpu->watchpoint_hit = wp;
2099 tb_check_watchpoint(cpu);
2100 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2101 cpu->exception_index = EXCP_DEBUG;
2102 cpu_loop_exit(cpu);
2103 } else {
2104 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2105 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2106 cpu_resume_from_signal(cpu, NULL);
2109 } else {
2110 wp->flags &= ~BP_WATCHPOINT_HIT;
2115 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2116 so these check for a hit then pass through to the normal out-of-line
2117 phys routines. */
2118 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2119 unsigned size, MemTxAttrs attrs)
2121 MemTxResult res;
2122 uint64_t data;
2123 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2124 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2126 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2127 switch (size) {
2128 case 1:
2129 data = address_space_ldub(as, addr, attrs, &res);
2130 break;
2131 case 2:
2132 data = address_space_lduw(as, addr, attrs, &res);
2133 break;
2134 case 4:
2135 data = address_space_ldl(as, addr, attrs, &res);
2136 break;
2137 default: abort();
2139 *pdata = data;
2140 return res;
2143 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2144 uint64_t val, unsigned size,
2145 MemTxAttrs attrs)
2147 MemTxResult res;
2148 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2149 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2151 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2152 switch (size) {
2153 case 1:
2154 address_space_stb(as, addr, val, attrs, &res);
2155 break;
2156 case 2:
2157 address_space_stw(as, addr, val, attrs, &res);
2158 break;
2159 case 4:
2160 address_space_stl(as, addr, val, attrs, &res);
2161 break;
2162 default: abort();
2164 return res;
2167 static const MemoryRegionOps watch_mem_ops = {
2168 .read_with_attrs = watch_mem_read,
2169 .write_with_attrs = watch_mem_write,
2170 .endianness = DEVICE_NATIVE_ENDIAN,
2173 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2174 unsigned len, MemTxAttrs attrs)
2176 subpage_t *subpage = opaque;
2177 uint8_t buf[8];
2178 MemTxResult res;
2180 #if defined(DEBUG_SUBPAGE)
2181 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2182 subpage, len, addr);
2183 #endif
2184 res = address_space_read(subpage->as, addr + subpage->base,
2185 attrs, buf, len);
2186 if (res) {
2187 return res;
2189 switch (len) {
2190 case 1:
2191 *data = ldub_p(buf);
2192 return MEMTX_OK;
2193 case 2:
2194 *data = lduw_p(buf);
2195 return MEMTX_OK;
2196 case 4:
2197 *data = ldl_p(buf);
2198 return MEMTX_OK;
2199 case 8:
2200 *data = ldq_p(buf);
2201 return MEMTX_OK;
2202 default:
2203 abort();
2207 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2208 uint64_t value, unsigned len, MemTxAttrs attrs)
2210 subpage_t *subpage = opaque;
2211 uint8_t buf[8];
2213 #if defined(DEBUG_SUBPAGE)
2214 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2215 " value %"PRIx64"\n",
2216 __func__, subpage, len, addr, value);
2217 #endif
2218 switch (len) {
2219 case 1:
2220 stb_p(buf, value);
2221 break;
2222 case 2:
2223 stw_p(buf, value);
2224 break;
2225 case 4:
2226 stl_p(buf, value);
2227 break;
2228 case 8:
2229 stq_p(buf, value);
2230 break;
2231 default:
2232 abort();
2234 return address_space_write(subpage->as, addr + subpage->base,
2235 attrs, buf, len);
2238 static bool subpage_accepts(void *opaque, hwaddr addr,
2239 unsigned len, bool is_write)
2241 subpage_t *subpage = opaque;
2242 #if defined(DEBUG_SUBPAGE)
2243 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2244 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2245 #endif
2247 return address_space_access_valid(subpage->as, addr + subpage->base,
2248 len, is_write);
2251 static const MemoryRegionOps subpage_ops = {
2252 .read_with_attrs = subpage_read,
2253 .write_with_attrs = subpage_write,
2254 .impl.min_access_size = 1,
2255 .impl.max_access_size = 8,
2256 .valid.min_access_size = 1,
2257 .valid.max_access_size = 8,
2258 .valid.accepts = subpage_accepts,
2259 .endianness = DEVICE_NATIVE_ENDIAN,
2262 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2263 uint16_t section)
2265 int idx, eidx;
2267 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2268 return -1;
2269 idx = SUBPAGE_IDX(start);
2270 eidx = SUBPAGE_IDX(end);
2271 #if defined(DEBUG_SUBPAGE)
2272 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2273 __func__, mmio, start, end, idx, eidx, section);
2274 #endif
2275 for (; idx <= eidx; idx++) {
2276 mmio->sub_section[idx] = section;
2279 return 0;
2282 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2284 subpage_t *mmio;
2286 mmio = g_malloc0(sizeof(subpage_t));
2288 mmio->as = as;
2289 mmio->base = base;
2290 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2291 NULL, TARGET_PAGE_SIZE);
2292 mmio->iomem.subpage = true;
2293 #if defined(DEBUG_SUBPAGE)
2294 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2295 mmio, base, TARGET_PAGE_SIZE);
2296 #endif
2297 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2299 return mmio;
2302 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2303 MemoryRegion *mr)
2305 assert(as);
2306 MemoryRegionSection section = {
2307 .address_space = as,
2308 .mr = mr,
2309 .offset_within_address_space = 0,
2310 .offset_within_region = 0,
2311 .size = int128_2_64(),
2314 return phys_section_add(map, &section);
2317 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2319 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2320 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2321 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2322 MemoryRegionSection *sections = d->map.sections;
2324 return sections[index & ~TARGET_PAGE_MASK].mr;
2327 static void io_mem_init(void)
2329 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2330 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2331 NULL, UINT64_MAX);
2332 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2333 NULL, UINT64_MAX);
2334 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2335 NULL, UINT64_MAX);
2338 static void mem_begin(MemoryListener *listener)
2340 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2341 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2342 uint16_t n;
2344 n = dummy_section(&d->map, as, &io_mem_unassigned);
2345 assert(n == PHYS_SECTION_UNASSIGNED);
2346 n = dummy_section(&d->map, as, &io_mem_notdirty);
2347 assert(n == PHYS_SECTION_NOTDIRTY);
2348 n = dummy_section(&d->map, as, &io_mem_rom);
2349 assert(n == PHYS_SECTION_ROM);
2350 n = dummy_section(&d->map, as, &io_mem_watch);
2351 assert(n == PHYS_SECTION_WATCH);
2353 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2354 d->as = as;
2355 as->next_dispatch = d;
2358 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2360 phys_sections_free(&d->map);
2361 g_free(d);
2364 static void mem_commit(MemoryListener *listener)
2366 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2367 AddressSpaceDispatch *cur = as->dispatch;
2368 AddressSpaceDispatch *next = as->next_dispatch;
2370 phys_page_compact_all(next, next->map.nodes_nb);
2372 atomic_rcu_set(&as->dispatch, next);
2373 if (cur) {
2374 call_rcu(cur, address_space_dispatch_free, rcu);
2378 static void tcg_commit(MemoryListener *listener)
2380 CPUAddressSpace *cpuas;
2381 AddressSpaceDispatch *d;
2383 /* since each CPU stores ram addresses in its TLB cache, we must
2384 reset the modified entries */
2385 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2386 cpu_reloading_memory_map();
2387 /* The CPU and TLB are protected by the iothread lock.
2388 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2389 * may have split the RCU critical section.
2391 d = atomic_rcu_read(&cpuas->as->dispatch);
2392 cpuas->memory_dispatch = d;
2393 tlb_flush(cpuas->cpu, 1);
2396 void address_space_init_dispatch(AddressSpace *as)
2398 as->dispatch = NULL;
2399 as->dispatch_listener = (MemoryListener) {
2400 .begin = mem_begin,
2401 .commit = mem_commit,
2402 .region_add = mem_add,
2403 .region_nop = mem_add,
2404 .priority = 0,
2406 memory_listener_register(&as->dispatch_listener, as);
2409 void address_space_unregister(AddressSpace *as)
2411 memory_listener_unregister(&as->dispatch_listener);
2414 void address_space_destroy_dispatch(AddressSpace *as)
2416 AddressSpaceDispatch *d = as->dispatch;
2418 atomic_rcu_set(&as->dispatch, NULL);
2419 if (d) {
2420 call_rcu(d, address_space_dispatch_free, rcu);
2424 static void memory_map_init(void)
2426 system_memory = g_malloc(sizeof(*system_memory));
2428 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2429 address_space_init(&address_space_memory, system_memory, "memory");
2431 system_io = g_malloc(sizeof(*system_io));
2432 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2433 65536);
2434 address_space_init(&address_space_io, system_io, "I/O");
2437 MemoryRegion *get_system_memory(void)
2439 return system_memory;
2442 MemoryRegion *get_system_io(void)
2444 return system_io;
2447 #endif /* !defined(CONFIG_USER_ONLY) */
2449 /* physical memory access (slow version, mainly for debug) */
2450 #if defined(CONFIG_USER_ONLY)
2451 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2452 uint8_t *buf, int len, int is_write)
2454 int l, flags;
2455 target_ulong page;
2456 void * p;
2458 while (len > 0) {
2459 page = addr & TARGET_PAGE_MASK;
2460 l = (page + TARGET_PAGE_SIZE) - addr;
2461 if (l > len)
2462 l = len;
2463 flags = page_get_flags(page);
2464 if (!(flags & PAGE_VALID))
2465 return -1;
2466 if (is_write) {
2467 if (!(flags & PAGE_WRITE))
2468 return -1;
2469 /* XXX: this code should not depend on lock_user */
2470 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2471 return -1;
2472 memcpy(p, buf, l);
2473 unlock_user(p, addr, l);
2474 } else {
2475 if (!(flags & PAGE_READ))
2476 return -1;
2477 /* XXX: this code should not depend on lock_user */
2478 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2479 return -1;
2480 memcpy(buf, p, l);
2481 unlock_user(p, addr, 0);
2483 len -= l;
2484 buf += l;
2485 addr += l;
2487 return 0;
2490 #else
2492 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2493 hwaddr length)
2495 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2496 /* No early return if dirty_log_mask is or becomes 0, because
2497 * cpu_physical_memory_set_dirty_range will still call
2498 * xen_modified_memory.
2500 if (dirty_log_mask) {
2501 dirty_log_mask =
2502 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2504 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2505 tb_invalidate_phys_range(addr, addr + length);
2506 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2508 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2511 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2513 unsigned access_size_max = mr->ops->valid.max_access_size;
2515 /* Regions are assumed to support 1-4 byte accesses unless
2516 otherwise specified. */
2517 if (access_size_max == 0) {
2518 access_size_max = 4;
2521 /* Bound the maximum access by the alignment of the address. */
2522 if (!mr->ops->impl.unaligned) {
2523 unsigned align_size_max = addr & -addr;
2524 if (align_size_max != 0 && align_size_max < access_size_max) {
2525 access_size_max = align_size_max;
2529 /* Don't attempt accesses larger than the maximum. */
2530 if (l > access_size_max) {
2531 l = access_size_max;
2533 l = pow2floor(l);
2535 return l;
2538 static bool prepare_mmio_access(MemoryRegion *mr)
2540 bool unlocked = !qemu_mutex_iothread_locked();
2541 bool release_lock = false;
2543 if (unlocked && mr->global_locking) {
2544 qemu_mutex_lock_iothread();
2545 unlocked = false;
2546 release_lock = true;
2548 if (mr->flush_coalesced_mmio) {
2549 if (unlocked) {
2550 qemu_mutex_lock_iothread();
2552 qemu_flush_coalesced_mmio_buffer();
2553 if (unlocked) {
2554 qemu_mutex_unlock_iothread();
2558 return release_lock;
2561 /* Called within RCU critical section. */
2562 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2563 MemTxAttrs attrs,
2564 const uint8_t *buf,
2565 int len, hwaddr addr1,
2566 hwaddr l, MemoryRegion *mr)
2568 uint8_t *ptr;
2569 uint64_t val;
2570 MemTxResult result = MEMTX_OK;
2571 bool release_lock = false;
2573 for (;;) {
2574 if (!memory_access_is_direct(mr, true)) {
2575 release_lock |= prepare_mmio_access(mr);
2576 l = memory_access_size(mr, l, addr1);
2577 /* XXX: could force current_cpu to NULL to avoid
2578 potential bugs */
2579 switch (l) {
2580 case 8:
2581 /* 64 bit write access */
2582 val = ldq_p(buf);
2583 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2584 attrs);
2585 break;
2586 case 4:
2587 /* 32 bit write access */
2588 val = ldl_p(buf);
2589 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2590 attrs);
2591 break;
2592 case 2:
2593 /* 16 bit write access */
2594 val = lduw_p(buf);
2595 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2596 attrs);
2597 break;
2598 case 1:
2599 /* 8 bit write access */
2600 val = ldub_p(buf);
2601 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2602 attrs);
2603 break;
2604 default:
2605 abort();
2607 } else {
2608 addr1 += memory_region_get_ram_addr(mr);
2609 /* RAM case */
2610 ptr = qemu_get_ram_ptr(addr1);
2611 memcpy(ptr, buf, l);
2612 invalidate_and_set_dirty(mr, addr1, l);
2615 if (release_lock) {
2616 qemu_mutex_unlock_iothread();
2617 release_lock = false;
2620 len -= l;
2621 buf += l;
2622 addr += l;
2624 if (!len) {
2625 break;
2628 l = len;
2629 mr = address_space_translate(as, addr, &addr1, &l, true);
2632 return result;
2635 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2636 const uint8_t *buf, int len)
2638 hwaddr l;
2639 hwaddr addr1;
2640 MemoryRegion *mr;
2641 MemTxResult result = MEMTX_OK;
2643 if (len > 0) {
2644 rcu_read_lock();
2645 l = len;
2646 mr = address_space_translate(as, addr, &addr1, &l, true);
2647 result = address_space_write_continue(as, addr, attrs, buf, len,
2648 addr1, l, mr);
2649 rcu_read_unlock();
2652 return result;
2655 /* Called within RCU critical section. */
2656 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2657 MemTxAttrs attrs, uint8_t *buf,
2658 int len, hwaddr addr1, hwaddr l,
2659 MemoryRegion *mr)
2661 uint8_t *ptr;
2662 uint64_t val;
2663 MemTxResult result = MEMTX_OK;
2664 bool release_lock = false;
2666 for (;;) {
2667 if (!memory_access_is_direct(mr, false)) {
2668 /* I/O case */
2669 release_lock |= prepare_mmio_access(mr);
2670 l = memory_access_size(mr, l, addr1);
2671 switch (l) {
2672 case 8:
2673 /* 64 bit read access */
2674 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2675 attrs);
2676 stq_p(buf, val);
2677 break;
2678 case 4:
2679 /* 32 bit read access */
2680 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2681 attrs);
2682 stl_p(buf, val);
2683 break;
2684 case 2:
2685 /* 16 bit read access */
2686 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2687 attrs);
2688 stw_p(buf, val);
2689 break;
2690 case 1:
2691 /* 8 bit read access */
2692 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2693 attrs);
2694 stb_p(buf, val);
2695 break;
2696 default:
2697 abort();
2699 } else {
2700 /* RAM case */
2701 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2702 memcpy(buf, ptr, l);
2705 if (release_lock) {
2706 qemu_mutex_unlock_iothread();
2707 release_lock = false;
2710 len -= l;
2711 buf += l;
2712 addr += l;
2714 if (!len) {
2715 break;
2718 l = len;
2719 mr = address_space_translate(as, addr, &addr1, &l, false);
2722 return result;
2725 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2726 MemTxAttrs attrs, uint8_t *buf, int len)
2728 hwaddr l;
2729 hwaddr addr1;
2730 MemoryRegion *mr;
2731 MemTxResult result = MEMTX_OK;
2733 if (len > 0) {
2734 rcu_read_lock();
2735 l = len;
2736 mr = address_space_translate(as, addr, &addr1, &l, false);
2737 result = address_space_read_continue(as, addr, attrs, buf, len,
2738 addr1, l, mr);
2739 rcu_read_unlock();
2742 return result;
2745 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2746 uint8_t *buf, int len, bool is_write)
2748 if (is_write) {
2749 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2750 } else {
2751 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2755 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2756 int len, int is_write)
2758 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2759 buf, len, is_write);
2762 enum write_rom_type {
2763 WRITE_DATA,
2764 FLUSH_CACHE,
2767 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2768 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2770 hwaddr l;
2771 uint8_t *ptr;
2772 hwaddr addr1;
2773 MemoryRegion *mr;
2775 rcu_read_lock();
2776 while (len > 0) {
2777 l = len;
2778 mr = address_space_translate(as, addr, &addr1, &l, true);
2780 if (!(memory_region_is_ram(mr) ||
2781 memory_region_is_romd(mr))) {
2782 l = memory_access_size(mr, l, addr1);
2783 } else {
2784 addr1 += memory_region_get_ram_addr(mr);
2785 /* ROM/RAM case */
2786 ptr = qemu_get_ram_ptr(addr1);
2787 switch (type) {
2788 case WRITE_DATA:
2789 memcpy(ptr, buf, l);
2790 invalidate_and_set_dirty(mr, addr1, l);
2791 break;
2792 case FLUSH_CACHE:
2793 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2794 break;
2797 len -= l;
2798 buf += l;
2799 addr += l;
2801 rcu_read_unlock();
2804 /* used for ROM loading : can write in RAM and ROM */
2805 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2806 const uint8_t *buf, int len)
2808 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2811 void cpu_flush_icache_range(hwaddr start, int len)
2814 * This function should do the same thing as an icache flush that was
2815 * triggered from within the guest. For TCG we are always cache coherent,
2816 * so there is no need to flush anything. For KVM / Xen we need to flush
2817 * the host's instruction cache at least.
2819 if (tcg_enabled()) {
2820 return;
2823 cpu_physical_memory_write_rom_internal(&address_space_memory,
2824 start, NULL, len, FLUSH_CACHE);
2827 typedef struct {
2828 MemoryRegion *mr;
2829 void *buffer;
2830 hwaddr addr;
2831 hwaddr len;
2832 bool in_use;
2833 } BounceBuffer;
2835 static BounceBuffer bounce;
2837 typedef struct MapClient {
2838 QEMUBH *bh;
2839 QLIST_ENTRY(MapClient) link;
2840 } MapClient;
2842 QemuMutex map_client_list_lock;
2843 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2844 = QLIST_HEAD_INITIALIZER(map_client_list);
2846 static void cpu_unregister_map_client_do(MapClient *client)
2848 QLIST_REMOVE(client, link);
2849 g_free(client);
2852 static void cpu_notify_map_clients_locked(void)
2854 MapClient *client;
2856 while (!QLIST_EMPTY(&map_client_list)) {
2857 client = QLIST_FIRST(&map_client_list);
2858 qemu_bh_schedule(client->bh);
2859 cpu_unregister_map_client_do(client);
2863 void cpu_register_map_client(QEMUBH *bh)
2865 MapClient *client = g_malloc(sizeof(*client));
2867 qemu_mutex_lock(&map_client_list_lock);
2868 client->bh = bh;
2869 QLIST_INSERT_HEAD(&map_client_list, client, link);
2870 if (!atomic_read(&bounce.in_use)) {
2871 cpu_notify_map_clients_locked();
2873 qemu_mutex_unlock(&map_client_list_lock);
2876 void cpu_exec_init_all(void)
2878 qemu_mutex_init(&ram_list.mutex);
2879 io_mem_init();
2880 memory_map_init();
2881 qemu_mutex_init(&map_client_list_lock);
2884 void cpu_unregister_map_client(QEMUBH *bh)
2886 MapClient *client;
2888 qemu_mutex_lock(&map_client_list_lock);
2889 QLIST_FOREACH(client, &map_client_list, link) {
2890 if (client->bh == bh) {
2891 cpu_unregister_map_client_do(client);
2892 break;
2895 qemu_mutex_unlock(&map_client_list_lock);
2898 static void cpu_notify_map_clients(void)
2900 qemu_mutex_lock(&map_client_list_lock);
2901 cpu_notify_map_clients_locked();
2902 qemu_mutex_unlock(&map_client_list_lock);
2905 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2907 MemoryRegion *mr;
2908 hwaddr l, xlat;
2910 rcu_read_lock();
2911 while (len > 0) {
2912 l = len;
2913 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2914 if (!memory_access_is_direct(mr, is_write)) {
2915 l = memory_access_size(mr, l, addr);
2916 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2917 return false;
2921 len -= l;
2922 addr += l;
2924 rcu_read_unlock();
2925 return true;
2928 /* Map a physical memory region into a host virtual address.
2929 * May map a subset of the requested range, given by and returned in *plen.
2930 * May return NULL if resources needed to perform the mapping are exhausted.
2931 * Use only for reads OR writes - not for read-modify-write operations.
2932 * Use cpu_register_map_client() to know when retrying the map operation is
2933 * likely to succeed.
2935 void *address_space_map(AddressSpace *as,
2936 hwaddr addr,
2937 hwaddr *plen,
2938 bool is_write)
2940 hwaddr len = *plen;
2941 hwaddr done = 0;
2942 hwaddr l, xlat, base;
2943 MemoryRegion *mr, *this_mr;
2944 ram_addr_t raddr;
2945 void *ptr;
2947 if (len == 0) {
2948 return NULL;
2951 l = len;
2952 rcu_read_lock();
2953 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2955 if (!memory_access_is_direct(mr, is_write)) {
2956 if (atomic_xchg(&bounce.in_use, true)) {
2957 rcu_read_unlock();
2958 return NULL;
2960 /* Avoid unbounded allocations */
2961 l = MIN(l, TARGET_PAGE_SIZE);
2962 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2963 bounce.addr = addr;
2964 bounce.len = l;
2966 memory_region_ref(mr);
2967 bounce.mr = mr;
2968 if (!is_write) {
2969 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2970 bounce.buffer, l);
2973 rcu_read_unlock();
2974 *plen = l;
2975 return bounce.buffer;
2978 base = xlat;
2979 raddr = memory_region_get_ram_addr(mr);
2981 for (;;) {
2982 len -= l;
2983 addr += l;
2984 done += l;
2985 if (len == 0) {
2986 break;
2989 l = len;
2990 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2991 if (this_mr != mr || xlat != base + done) {
2992 break;
2996 memory_region_ref(mr);
2997 *plen = done;
2998 ptr = qemu_ram_ptr_length(raddr + base, plen);
2999 rcu_read_unlock();
3001 return ptr;
3004 /* Unmaps a memory region previously mapped by address_space_map().
3005 * Will also mark the memory as dirty if is_write == 1. access_len gives
3006 * the amount of memory that was actually read or written by the caller.
3008 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3009 int is_write, hwaddr access_len)
3011 if (buffer != bounce.buffer) {
3012 MemoryRegion *mr;
3013 ram_addr_t addr1;
3015 mr = qemu_ram_addr_from_host(buffer, &addr1);
3016 assert(mr != NULL);
3017 if (is_write) {
3018 invalidate_and_set_dirty(mr, addr1, access_len);
3020 if (xen_enabled()) {
3021 xen_invalidate_map_cache_entry(buffer);
3023 memory_region_unref(mr);
3024 return;
3026 if (is_write) {
3027 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3028 bounce.buffer, access_len);
3030 qemu_vfree(bounce.buffer);
3031 bounce.buffer = NULL;
3032 memory_region_unref(bounce.mr);
3033 atomic_mb_set(&bounce.in_use, false);
3034 cpu_notify_map_clients();
3037 void *cpu_physical_memory_map(hwaddr addr,
3038 hwaddr *plen,
3039 int is_write)
3041 return address_space_map(&address_space_memory, addr, plen, is_write);
3044 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3045 int is_write, hwaddr access_len)
3047 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3050 /* warning: addr must be aligned */
3051 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3052 MemTxAttrs attrs,
3053 MemTxResult *result,
3054 enum device_endian endian)
3056 uint8_t *ptr;
3057 uint64_t val;
3058 MemoryRegion *mr;
3059 hwaddr l = 4;
3060 hwaddr addr1;
3061 MemTxResult r;
3062 bool release_lock = false;
3064 rcu_read_lock();
3065 mr = address_space_translate(as, addr, &addr1, &l, false);
3066 if (l < 4 || !memory_access_is_direct(mr, false)) {
3067 release_lock |= prepare_mmio_access(mr);
3069 /* I/O case */
3070 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3071 #if defined(TARGET_WORDS_BIGENDIAN)
3072 if (endian == DEVICE_LITTLE_ENDIAN) {
3073 val = bswap32(val);
3075 #else
3076 if (endian == DEVICE_BIG_ENDIAN) {
3077 val = bswap32(val);
3079 #endif
3080 } else {
3081 /* RAM case */
3082 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3083 & TARGET_PAGE_MASK)
3084 + addr1);
3085 switch (endian) {
3086 case DEVICE_LITTLE_ENDIAN:
3087 val = ldl_le_p(ptr);
3088 break;
3089 case DEVICE_BIG_ENDIAN:
3090 val = ldl_be_p(ptr);
3091 break;
3092 default:
3093 val = ldl_p(ptr);
3094 break;
3096 r = MEMTX_OK;
3098 if (result) {
3099 *result = r;
3101 if (release_lock) {
3102 qemu_mutex_unlock_iothread();
3104 rcu_read_unlock();
3105 return val;
3108 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3109 MemTxAttrs attrs, MemTxResult *result)
3111 return address_space_ldl_internal(as, addr, attrs, result,
3112 DEVICE_NATIVE_ENDIAN);
3115 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3116 MemTxAttrs attrs, MemTxResult *result)
3118 return address_space_ldl_internal(as, addr, attrs, result,
3119 DEVICE_LITTLE_ENDIAN);
3122 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3123 MemTxAttrs attrs, MemTxResult *result)
3125 return address_space_ldl_internal(as, addr, attrs, result,
3126 DEVICE_BIG_ENDIAN);
3129 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3131 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3134 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3136 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3139 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3141 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3144 /* warning: addr must be aligned */
3145 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3146 MemTxAttrs attrs,
3147 MemTxResult *result,
3148 enum device_endian endian)
3150 uint8_t *ptr;
3151 uint64_t val;
3152 MemoryRegion *mr;
3153 hwaddr l = 8;
3154 hwaddr addr1;
3155 MemTxResult r;
3156 bool release_lock = false;
3158 rcu_read_lock();
3159 mr = address_space_translate(as, addr, &addr1, &l,
3160 false);
3161 if (l < 8 || !memory_access_is_direct(mr, false)) {
3162 release_lock |= prepare_mmio_access(mr);
3164 /* I/O case */
3165 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3166 #if defined(TARGET_WORDS_BIGENDIAN)
3167 if (endian == DEVICE_LITTLE_ENDIAN) {
3168 val = bswap64(val);
3170 #else
3171 if (endian == DEVICE_BIG_ENDIAN) {
3172 val = bswap64(val);
3174 #endif
3175 } else {
3176 /* RAM case */
3177 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3178 & TARGET_PAGE_MASK)
3179 + addr1);
3180 switch (endian) {
3181 case DEVICE_LITTLE_ENDIAN:
3182 val = ldq_le_p(ptr);
3183 break;
3184 case DEVICE_BIG_ENDIAN:
3185 val = ldq_be_p(ptr);
3186 break;
3187 default:
3188 val = ldq_p(ptr);
3189 break;
3191 r = MEMTX_OK;
3193 if (result) {
3194 *result = r;
3196 if (release_lock) {
3197 qemu_mutex_unlock_iothread();
3199 rcu_read_unlock();
3200 return val;
3203 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3204 MemTxAttrs attrs, MemTxResult *result)
3206 return address_space_ldq_internal(as, addr, attrs, result,
3207 DEVICE_NATIVE_ENDIAN);
3210 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3211 MemTxAttrs attrs, MemTxResult *result)
3213 return address_space_ldq_internal(as, addr, attrs, result,
3214 DEVICE_LITTLE_ENDIAN);
3217 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3218 MemTxAttrs attrs, MemTxResult *result)
3220 return address_space_ldq_internal(as, addr, attrs, result,
3221 DEVICE_BIG_ENDIAN);
3224 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3226 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3229 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3231 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3234 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3236 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3239 /* XXX: optimize */
3240 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3241 MemTxAttrs attrs, MemTxResult *result)
3243 uint8_t val;
3244 MemTxResult r;
3246 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3247 if (result) {
3248 *result = r;
3250 return val;
3253 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3255 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3258 /* warning: addr must be aligned */
3259 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3260 hwaddr addr,
3261 MemTxAttrs attrs,
3262 MemTxResult *result,
3263 enum device_endian endian)
3265 uint8_t *ptr;
3266 uint64_t val;
3267 MemoryRegion *mr;
3268 hwaddr l = 2;
3269 hwaddr addr1;
3270 MemTxResult r;
3271 bool release_lock = false;
3273 rcu_read_lock();
3274 mr = address_space_translate(as, addr, &addr1, &l,
3275 false);
3276 if (l < 2 || !memory_access_is_direct(mr, false)) {
3277 release_lock |= prepare_mmio_access(mr);
3279 /* I/O case */
3280 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3281 #if defined(TARGET_WORDS_BIGENDIAN)
3282 if (endian == DEVICE_LITTLE_ENDIAN) {
3283 val = bswap16(val);
3285 #else
3286 if (endian == DEVICE_BIG_ENDIAN) {
3287 val = bswap16(val);
3289 #endif
3290 } else {
3291 /* RAM case */
3292 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3293 & TARGET_PAGE_MASK)
3294 + addr1);
3295 switch (endian) {
3296 case DEVICE_LITTLE_ENDIAN:
3297 val = lduw_le_p(ptr);
3298 break;
3299 case DEVICE_BIG_ENDIAN:
3300 val = lduw_be_p(ptr);
3301 break;
3302 default:
3303 val = lduw_p(ptr);
3304 break;
3306 r = MEMTX_OK;
3308 if (result) {
3309 *result = r;
3311 if (release_lock) {
3312 qemu_mutex_unlock_iothread();
3314 rcu_read_unlock();
3315 return val;
3318 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3319 MemTxAttrs attrs, MemTxResult *result)
3321 return address_space_lduw_internal(as, addr, attrs, result,
3322 DEVICE_NATIVE_ENDIAN);
3325 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3326 MemTxAttrs attrs, MemTxResult *result)
3328 return address_space_lduw_internal(as, addr, attrs, result,
3329 DEVICE_LITTLE_ENDIAN);
3332 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3333 MemTxAttrs attrs, MemTxResult *result)
3335 return address_space_lduw_internal(as, addr, attrs, result,
3336 DEVICE_BIG_ENDIAN);
3339 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3341 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3344 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3346 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3349 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3351 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3354 /* warning: addr must be aligned. The ram page is not masked as dirty
3355 and the code inside is not invalidated. It is useful if the dirty
3356 bits are used to track modified PTEs */
3357 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3358 MemTxAttrs attrs, MemTxResult *result)
3360 uint8_t *ptr;
3361 MemoryRegion *mr;
3362 hwaddr l = 4;
3363 hwaddr addr1;
3364 MemTxResult r;
3365 uint8_t dirty_log_mask;
3366 bool release_lock = false;
3368 rcu_read_lock();
3369 mr = address_space_translate(as, addr, &addr1, &l,
3370 true);
3371 if (l < 4 || !memory_access_is_direct(mr, true)) {
3372 release_lock |= prepare_mmio_access(mr);
3374 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3375 } else {
3376 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3377 ptr = qemu_get_ram_ptr(addr1);
3378 stl_p(ptr, val);
3380 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3381 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3382 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3383 r = MEMTX_OK;
3385 if (result) {
3386 *result = r;
3388 if (release_lock) {
3389 qemu_mutex_unlock_iothread();
3391 rcu_read_unlock();
3394 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3396 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3399 /* warning: addr must be aligned */
3400 static inline void address_space_stl_internal(AddressSpace *as,
3401 hwaddr addr, uint32_t val,
3402 MemTxAttrs attrs,
3403 MemTxResult *result,
3404 enum device_endian endian)
3406 uint8_t *ptr;
3407 MemoryRegion *mr;
3408 hwaddr l = 4;
3409 hwaddr addr1;
3410 MemTxResult r;
3411 bool release_lock = false;
3413 rcu_read_lock();
3414 mr = address_space_translate(as, addr, &addr1, &l,
3415 true);
3416 if (l < 4 || !memory_access_is_direct(mr, true)) {
3417 release_lock |= prepare_mmio_access(mr);
3419 #if defined(TARGET_WORDS_BIGENDIAN)
3420 if (endian == DEVICE_LITTLE_ENDIAN) {
3421 val = bswap32(val);
3423 #else
3424 if (endian == DEVICE_BIG_ENDIAN) {
3425 val = bswap32(val);
3427 #endif
3428 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3429 } else {
3430 /* RAM case */
3431 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3432 ptr = qemu_get_ram_ptr(addr1);
3433 switch (endian) {
3434 case DEVICE_LITTLE_ENDIAN:
3435 stl_le_p(ptr, val);
3436 break;
3437 case DEVICE_BIG_ENDIAN:
3438 stl_be_p(ptr, val);
3439 break;
3440 default:
3441 stl_p(ptr, val);
3442 break;
3444 invalidate_and_set_dirty(mr, addr1, 4);
3445 r = MEMTX_OK;
3447 if (result) {
3448 *result = r;
3450 if (release_lock) {
3451 qemu_mutex_unlock_iothread();
3453 rcu_read_unlock();
3456 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3457 MemTxAttrs attrs, MemTxResult *result)
3459 address_space_stl_internal(as, addr, val, attrs, result,
3460 DEVICE_NATIVE_ENDIAN);
3463 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3464 MemTxAttrs attrs, MemTxResult *result)
3466 address_space_stl_internal(as, addr, val, attrs, result,
3467 DEVICE_LITTLE_ENDIAN);
3470 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3471 MemTxAttrs attrs, MemTxResult *result)
3473 address_space_stl_internal(as, addr, val, attrs, result,
3474 DEVICE_BIG_ENDIAN);
3477 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3479 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3482 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3484 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3487 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3489 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3492 /* XXX: optimize */
3493 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3494 MemTxAttrs attrs, MemTxResult *result)
3496 uint8_t v = val;
3497 MemTxResult r;
3499 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3500 if (result) {
3501 *result = r;
3505 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3507 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3510 /* warning: addr must be aligned */
3511 static inline void address_space_stw_internal(AddressSpace *as,
3512 hwaddr addr, uint32_t val,
3513 MemTxAttrs attrs,
3514 MemTxResult *result,
3515 enum device_endian endian)
3517 uint8_t *ptr;
3518 MemoryRegion *mr;
3519 hwaddr l = 2;
3520 hwaddr addr1;
3521 MemTxResult r;
3522 bool release_lock = false;
3524 rcu_read_lock();
3525 mr = address_space_translate(as, addr, &addr1, &l, true);
3526 if (l < 2 || !memory_access_is_direct(mr, true)) {
3527 release_lock |= prepare_mmio_access(mr);
3529 #if defined(TARGET_WORDS_BIGENDIAN)
3530 if (endian == DEVICE_LITTLE_ENDIAN) {
3531 val = bswap16(val);
3533 #else
3534 if (endian == DEVICE_BIG_ENDIAN) {
3535 val = bswap16(val);
3537 #endif
3538 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3539 } else {
3540 /* RAM case */
3541 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3542 ptr = qemu_get_ram_ptr(addr1);
3543 switch (endian) {
3544 case DEVICE_LITTLE_ENDIAN:
3545 stw_le_p(ptr, val);
3546 break;
3547 case DEVICE_BIG_ENDIAN:
3548 stw_be_p(ptr, val);
3549 break;
3550 default:
3551 stw_p(ptr, val);
3552 break;
3554 invalidate_and_set_dirty(mr, addr1, 2);
3555 r = MEMTX_OK;
3557 if (result) {
3558 *result = r;
3560 if (release_lock) {
3561 qemu_mutex_unlock_iothread();
3563 rcu_read_unlock();
3566 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3567 MemTxAttrs attrs, MemTxResult *result)
3569 address_space_stw_internal(as, addr, val, attrs, result,
3570 DEVICE_NATIVE_ENDIAN);
3573 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3574 MemTxAttrs attrs, MemTxResult *result)
3576 address_space_stw_internal(as, addr, val, attrs, result,
3577 DEVICE_LITTLE_ENDIAN);
3580 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3581 MemTxAttrs attrs, MemTxResult *result)
3583 address_space_stw_internal(as, addr, val, attrs, result,
3584 DEVICE_BIG_ENDIAN);
3587 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3589 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3592 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3594 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3597 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3599 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3602 /* XXX: optimize */
3603 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3604 MemTxAttrs attrs, MemTxResult *result)
3606 MemTxResult r;
3607 val = tswap64(val);
3608 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3609 if (result) {
3610 *result = r;
3614 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3615 MemTxAttrs attrs, MemTxResult *result)
3617 MemTxResult r;
3618 val = cpu_to_le64(val);
3619 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3620 if (result) {
3621 *result = r;
3624 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3625 MemTxAttrs attrs, MemTxResult *result)
3627 MemTxResult r;
3628 val = cpu_to_be64(val);
3629 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3630 if (result) {
3631 *result = r;
3635 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3637 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3640 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3642 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3645 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3647 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3650 /* virtual memory access for debug (includes writing to ROM) */
3651 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3652 uint8_t *buf, int len, int is_write)
3654 int l;
3655 hwaddr phys_addr;
3656 target_ulong page;
3658 while (len > 0) {
3659 int asidx;
3660 MemTxAttrs attrs;
3662 page = addr & TARGET_PAGE_MASK;
3663 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3664 asidx = cpu_asidx_from_attrs(cpu, attrs);
3665 /* if no physical page mapped, return an error */
3666 if (phys_addr == -1)
3667 return -1;
3668 l = (page + TARGET_PAGE_SIZE) - addr;
3669 if (l > len)
3670 l = len;
3671 phys_addr += (addr & ~TARGET_PAGE_MASK);
3672 if (is_write) {
3673 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3674 phys_addr, buf, l);
3675 } else {
3676 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3677 MEMTXATTRS_UNSPECIFIED,
3678 buf, l, 0);
3680 len -= l;
3681 buf += l;
3682 addr += l;
3684 return 0;
3688 * Allows code that needs to deal with migration bitmaps etc to still be built
3689 * target independent.
3691 size_t qemu_target_page_bits(void)
3693 return TARGET_PAGE_BITS;
3696 #endif
3699 * A helper function for the _utterly broken_ virtio device model to find out if
3700 * it's running on a big endian machine. Don't do this at home kids!
3702 bool target_words_bigendian(void);
3703 bool target_words_bigendian(void)
3705 #if defined(TARGET_WORDS_BIGENDIAN)
3706 return true;
3707 #else
3708 return false;
3709 #endif
3712 #ifndef CONFIG_USER_ONLY
3713 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3715 MemoryRegion*mr;
3716 hwaddr l = 1;
3717 bool res;
3719 rcu_read_lock();
3720 mr = address_space_translate(&address_space_memory,
3721 phys_addr, &phys_addr, &l, false);
3723 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3724 rcu_read_unlock();
3725 return res;
3728 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3730 RAMBlock *block;
3731 int ret = 0;
3733 rcu_read_lock();
3734 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3735 ret = func(block->idstr, block->host, block->offset,
3736 block->used_length, opaque);
3737 if (ret) {
3738 break;
3741 rcu_read_unlock();
3742 return ret;
3744 #endif