pc: apic: introduce APIC macro
[qemu/kevin.git] / exec.c
blobc81d5ab981e62d9351b4be7bd5b513dd888dbc9f
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include "qemu.h"
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
97 /* current CPU in the current thread. It is only valid inside
98 cpu_exec() */
99 __thread CPUState *current_cpu;
100 /* 0 = Do not count executed instructions.
101 1 = Precise instruction counting.
102 2 = Adaptive rate instruction counting. */
103 int use_icount;
105 #if !defined(CONFIG_USER_ONLY)
107 typedef struct PhysPageEntry PhysPageEntry;
109 struct PhysPageEntry {
110 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111 uint32_t skip : 6;
112 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
113 uint32_t ptr : 26;
116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
118 /* Size of the L2 (and L3, etc) page tables. */
119 #define ADDR_SPACE_BITS 64
121 #define P_L2_BITS 9
122 #define P_L2_SIZE (1 << P_L2_BITS)
124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
126 typedef PhysPageEntry Node[P_L2_SIZE];
128 typedef struct PhysPageMap {
129 struct rcu_head rcu;
131 unsigned sections_nb;
132 unsigned sections_nb_alloc;
133 unsigned nodes_nb;
134 unsigned nodes_nb_alloc;
135 Node *nodes;
136 MemoryRegionSection *sections;
137 } PhysPageMap;
139 struct AddressSpaceDispatch {
140 struct rcu_head rcu;
142 MemoryRegionSection *mru_section;
143 /* This is a multi-level map on the physical address space.
144 * The bottom level has pointers to MemoryRegionSections.
146 PhysPageEntry phys_map;
147 PhysPageMap map;
148 AddressSpace *as;
151 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
152 typedef struct subpage_t {
153 MemoryRegion iomem;
154 AddressSpace *as;
155 hwaddr base;
156 uint16_t sub_section[TARGET_PAGE_SIZE];
157 } subpage_t;
159 #define PHYS_SECTION_UNASSIGNED 0
160 #define PHYS_SECTION_NOTDIRTY 1
161 #define PHYS_SECTION_ROM 2
162 #define PHYS_SECTION_WATCH 3
164 static void io_mem_init(void);
165 static void memory_map_init(void);
166 static void tcg_commit(MemoryListener *listener);
168 static MemoryRegion io_mem_watch;
171 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
172 * @cpu: the CPU whose AddressSpace this is
173 * @as: the AddressSpace itself
174 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
175 * @tcg_as_listener: listener for tracking changes to the AddressSpace
177 struct CPUAddressSpace {
178 CPUState *cpu;
179 AddressSpace *as;
180 struct AddressSpaceDispatch *memory_dispatch;
181 MemoryListener tcg_as_listener;
184 #endif
186 #if !defined(CONFIG_USER_ONLY)
188 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
190 static unsigned alloc_hint = 16;
191 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
192 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
193 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
194 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
195 alloc_hint = map->nodes_nb_alloc;
199 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
201 unsigned i;
202 uint32_t ret;
203 PhysPageEntry e;
204 PhysPageEntry *p;
206 ret = map->nodes_nb++;
207 p = map->nodes[ret];
208 assert(ret != PHYS_MAP_NODE_NIL);
209 assert(ret != map->nodes_nb_alloc);
211 e.skip = leaf ? 0 : 1;
212 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
213 for (i = 0; i < P_L2_SIZE; ++i) {
214 memcpy(&p[i], &e, sizeof(e));
216 return ret;
219 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
220 hwaddr *index, hwaddr *nb, uint16_t leaf,
221 int level)
223 PhysPageEntry *p;
224 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
226 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
227 lp->ptr = phys_map_node_alloc(map, level == 0);
229 p = map->nodes[lp->ptr];
230 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
232 while (*nb && lp < &p[P_L2_SIZE]) {
233 if ((*index & (step - 1)) == 0 && *nb >= step) {
234 lp->skip = 0;
235 lp->ptr = leaf;
236 *index += step;
237 *nb -= step;
238 } else {
239 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
241 ++lp;
245 static void phys_page_set(AddressSpaceDispatch *d,
246 hwaddr index, hwaddr nb,
247 uint16_t leaf)
249 /* Wildly overreserve - it doesn't matter much. */
250 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
252 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
255 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
256 * and update our entry so we can skip it and go directly to the destination.
258 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
260 unsigned valid_ptr = P_L2_SIZE;
261 int valid = 0;
262 PhysPageEntry *p;
263 int i;
265 if (lp->ptr == PHYS_MAP_NODE_NIL) {
266 return;
269 p = nodes[lp->ptr];
270 for (i = 0; i < P_L2_SIZE; i++) {
271 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
272 continue;
275 valid_ptr = i;
276 valid++;
277 if (p[i].skip) {
278 phys_page_compact(&p[i], nodes, compacted);
282 /* We can only compress if there's only one child. */
283 if (valid != 1) {
284 return;
287 assert(valid_ptr < P_L2_SIZE);
289 /* Don't compress if it won't fit in the # of bits we have. */
290 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
291 return;
294 lp->ptr = p[valid_ptr].ptr;
295 if (!p[valid_ptr].skip) {
296 /* If our only child is a leaf, make this a leaf. */
297 /* By design, we should have made this node a leaf to begin with so we
298 * should never reach here.
299 * But since it's so simple to handle this, let's do it just in case we
300 * change this rule.
302 lp->skip = 0;
303 } else {
304 lp->skip += p[valid_ptr].skip;
308 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
310 DECLARE_BITMAP(compacted, nodes_nb);
312 if (d->phys_map.skip) {
313 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
317 static inline bool section_covers_addr(const MemoryRegionSection *section,
318 hwaddr addr)
320 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
321 * the section must cover the entire address space.
323 return section->size.hi ||
324 range_covers_byte(section->offset_within_address_space,
325 section->size.lo, addr);
328 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
329 Node *nodes, MemoryRegionSection *sections)
331 PhysPageEntry *p;
332 hwaddr index = addr >> TARGET_PAGE_BITS;
333 int i;
335 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
336 if (lp.ptr == PHYS_MAP_NODE_NIL) {
337 return &sections[PHYS_SECTION_UNASSIGNED];
339 p = nodes[lp.ptr];
340 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
343 if (section_covers_addr(&sections[lp.ptr], addr)) {
344 return &sections[lp.ptr];
345 } else {
346 return &sections[PHYS_SECTION_UNASSIGNED];
350 bool memory_region_is_unassigned(MemoryRegion *mr)
352 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
353 && mr != &io_mem_watch;
356 /* Called from RCU critical section */
357 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
358 hwaddr addr,
359 bool resolve_subpage)
361 MemoryRegionSection *section = atomic_read(&d->mru_section);
362 subpage_t *subpage;
363 bool update;
365 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
366 section_covers_addr(section, addr)) {
367 update = false;
368 } else {
369 section = phys_page_find(d->phys_map, addr, d->map.nodes,
370 d->map.sections);
371 update = true;
373 if (resolve_subpage && section->mr->subpage) {
374 subpage = container_of(section->mr, subpage_t, iomem);
375 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
377 if (update) {
378 atomic_set(&d->mru_section, section);
380 return section;
383 /* Called from RCU critical section */
384 static MemoryRegionSection *
385 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
386 hwaddr *plen, bool resolve_subpage)
388 MemoryRegionSection *section;
389 MemoryRegion *mr;
390 Int128 diff;
392 section = address_space_lookup_region(d, addr, resolve_subpage);
393 /* Compute offset within MemoryRegionSection */
394 addr -= section->offset_within_address_space;
396 /* Compute offset within MemoryRegion */
397 *xlat = addr + section->offset_within_region;
399 mr = section->mr;
401 /* MMIO registers can be expected to perform full-width accesses based only
402 * on their address, without considering adjacent registers that could
403 * decode to completely different MemoryRegions. When such registers
404 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
405 * regions overlap wildly. For this reason we cannot clamp the accesses
406 * here.
408 * If the length is small (as is the case for address_space_ldl/stl),
409 * everything works fine. If the incoming length is large, however,
410 * the caller really has to do the clamping through memory_access_size.
412 if (memory_region_is_ram(mr)) {
413 diff = int128_sub(section->size, int128_make64(addr));
414 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
416 return section;
419 /* Called from RCU critical section */
420 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
421 hwaddr *xlat, hwaddr *plen,
422 bool is_write)
424 IOMMUTLBEntry iotlb;
425 MemoryRegionSection *section;
426 MemoryRegion *mr;
428 for (;;) {
429 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
430 section = address_space_translate_internal(d, addr, &addr, plen, true);
431 mr = section->mr;
433 if (!mr->iommu_ops) {
434 break;
437 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
438 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
439 | (addr & iotlb.addr_mask));
440 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
441 if (!(iotlb.perm & (1 << is_write))) {
442 mr = &io_mem_unassigned;
443 break;
446 as = iotlb.target_as;
449 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
450 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
451 *plen = MIN(page, *plen);
454 *xlat = addr;
455 return mr;
458 /* Called from RCU critical section */
459 MemoryRegionSection *
460 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
461 hwaddr *xlat, hwaddr *plen)
463 MemoryRegionSection *section;
464 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
466 section = address_space_translate_internal(d, addr, xlat, plen, false);
468 assert(!section->mr->iommu_ops);
469 return section;
471 #endif
473 #if !defined(CONFIG_USER_ONLY)
475 static int cpu_common_post_load(void *opaque, int version_id)
477 CPUState *cpu = opaque;
479 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
480 version_id is increased. */
481 cpu->interrupt_request &= ~0x01;
482 tlb_flush(cpu, 1);
484 return 0;
487 static int cpu_common_pre_load(void *opaque)
489 CPUState *cpu = opaque;
491 cpu->exception_index = -1;
493 return 0;
496 static bool cpu_common_exception_index_needed(void *opaque)
498 CPUState *cpu = opaque;
500 return tcg_enabled() && cpu->exception_index != -1;
503 static const VMStateDescription vmstate_cpu_common_exception_index = {
504 .name = "cpu_common/exception_index",
505 .version_id = 1,
506 .minimum_version_id = 1,
507 .needed = cpu_common_exception_index_needed,
508 .fields = (VMStateField[]) {
509 VMSTATE_INT32(exception_index, CPUState),
510 VMSTATE_END_OF_LIST()
514 static bool cpu_common_crash_occurred_needed(void *opaque)
516 CPUState *cpu = opaque;
518 return cpu->crash_occurred;
521 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
522 .name = "cpu_common/crash_occurred",
523 .version_id = 1,
524 .minimum_version_id = 1,
525 .needed = cpu_common_crash_occurred_needed,
526 .fields = (VMStateField[]) {
527 VMSTATE_BOOL(crash_occurred, CPUState),
528 VMSTATE_END_OF_LIST()
532 const VMStateDescription vmstate_cpu_common = {
533 .name = "cpu_common",
534 .version_id = 1,
535 .minimum_version_id = 1,
536 .pre_load = cpu_common_pre_load,
537 .post_load = cpu_common_post_load,
538 .fields = (VMStateField[]) {
539 VMSTATE_UINT32(halted, CPUState),
540 VMSTATE_UINT32(interrupt_request, CPUState),
541 VMSTATE_END_OF_LIST()
543 .subsections = (const VMStateDescription*[]) {
544 &vmstate_cpu_common_exception_index,
545 &vmstate_cpu_common_crash_occurred,
546 NULL
550 #endif
552 CPUState *qemu_get_cpu(int index)
554 CPUState *cpu;
556 CPU_FOREACH(cpu) {
557 if (cpu->cpu_index == index) {
558 return cpu;
562 return NULL;
565 #if !defined(CONFIG_USER_ONLY)
566 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
568 CPUAddressSpace *newas;
570 /* Target code should have set num_ases before calling us */
571 assert(asidx < cpu->num_ases);
573 if (asidx == 0) {
574 /* address space 0 gets the convenience alias */
575 cpu->as = as;
578 /* KVM cannot currently support multiple address spaces. */
579 assert(asidx == 0 || !kvm_enabled());
581 if (!cpu->cpu_ases) {
582 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
585 newas = &cpu->cpu_ases[asidx];
586 newas->cpu = cpu;
587 newas->as = as;
588 if (tcg_enabled()) {
589 newas->tcg_as_listener.commit = tcg_commit;
590 memory_listener_register(&newas->tcg_as_listener, as);
594 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
596 /* Return the AddressSpace corresponding to the specified index */
597 return cpu->cpu_ases[asidx].as;
599 #endif
601 static bool cpu_index_auto_assigned;
603 static int cpu_get_free_index(void)
605 CPUState *some_cpu;
606 int cpu_index = 0;
608 cpu_index_auto_assigned = true;
609 CPU_FOREACH(some_cpu) {
610 cpu_index++;
612 return cpu_index;
615 void cpu_exec_exit(CPUState *cpu)
617 CPUClass *cc = CPU_GET_CLASS(cpu);
619 cpu_list_lock();
620 if (!QTAILQ_IN_USE(cpu, node)) {
621 /* there is nothing to undo since cpu_exec_init() hasn't been called */
622 cpu_list_unlock();
623 return;
626 assert(!(cpu_index_auto_assigned && cpu != QTAILQ_LAST(&cpus, CPUTailQ)));
628 QTAILQ_REMOVE(&cpus, cpu, node);
629 cpu->cpu_index = UNASSIGNED_CPU_INDEX;
630 cpu_list_unlock();
632 if (cc->vmsd != NULL) {
633 vmstate_unregister(NULL, cc->vmsd, cpu);
635 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
636 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
640 void cpu_exec_init(CPUState *cpu, Error **errp)
642 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
643 Error *local_err ATTRIBUTE_UNUSED = NULL;
645 cpu->as = NULL;
646 cpu->num_ases = 0;
648 #ifndef CONFIG_USER_ONLY
649 cpu->thread_id = qemu_get_thread_id();
651 /* This is a softmmu CPU object, so create a property for it
652 * so users can wire up its memory. (This can't go in qom/cpu.c
653 * because that file is compiled only once for both user-mode
654 * and system builds.) The default if no link is set up is to use
655 * the system address space.
657 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
658 (Object **)&cpu->memory,
659 qdev_prop_allow_set_link_before_realize,
660 OBJ_PROP_LINK_UNREF_ON_RELEASE,
661 &error_abort);
662 cpu->memory = system_memory;
663 object_ref(OBJECT(cpu->memory));
664 #endif
666 cpu_list_lock();
667 if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
668 cpu->cpu_index = cpu_get_free_index();
669 assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
670 } else {
671 assert(!cpu_index_auto_assigned);
673 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
674 cpu_list_unlock();
676 #ifndef CONFIG_USER_ONLY
677 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
678 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
680 if (cc->vmsd != NULL) {
681 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
683 #endif
686 #if defined(CONFIG_USER_ONLY)
687 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
689 tb_invalidate_phys_page_range(pc, pc + 1, 0);
691 #else
692 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
694 MemTxAttrs attrs;
695 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
696 int asidx = cpu_asidx_from_attrs(cpu, attrs);
697 if (phys != -1) {
698 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
699 phys | (pc & ~TARGET_PAGE_MASK));
702 #endif
704 #if defined(CONFIG_USER_ONLY)
705 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
710 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
711 int flags)
713 return -ENOSYS;
716 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
720 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
721 int flags, CPUWatchpoint **watchpoint)
723 return -ENOSYS;
725 #else
726 /* Add a watchpoint. */
727 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
728 int flags, CPUWatchpoint **watchpoint)
730 CPUWatchpoint *wp;
732 /* forbid ranges which are empty or run off the end of the address space */
733 if (len == 0 || (addr + len - 1) < addr) {
734 error_report("tried to set invalid watchpoint at %"
735 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
736 return -EINVAL;
738 wp = g_malloc(sizeof(*wp));
740 wp->vaddr = addr;
741 wp->len = len;
742 wp->flags = flags;
744 /* keep all GDB-injected watchpoints in front */
745 if (flags & BP_GDB) {
746 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
747 } else {
748 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
751 tlb_flush_page(cpu, addr);
753 if (watchpoint)
754 *watchpoint = wp;
755 return 0;
758 /* Remove a specific watchpoint. */
759 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
760 int flags)
762 CPUWatchpoint *wp;
764 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
765 if (addr == wp->vaddr && len == wp->len
766 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
767 cpu_watchpoint_remove_by_ref(cpu, wp);
768 return 0;
771 return -ENOENT;
774 /* Remove a specific watchpoint by reference. */
775 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
777 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
779 tlb_flush_page(cpu, watchpoint->vaddr);
781 g_free(watchpoint);
784 /* Remove all matching watchpoints. */
785 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
787 CPUWatchpoint *wp, *next;
789 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
790 if (wp->flags & mask) {
791 cpu_watchpoint_remove_by_ref(cpu, wp);
796 /* Return true if this watchpoint address matches the specified
797 * access (ie the address range covered by the watchpoint overlaps
798 * partially or completely with the address range covered by the
799 * access).
801 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
802 vaddr addr,
803 vaddr len)
805 /* We know the lengths are non-zero, but a little caution is
806 * required to avoid errors in the case where the range ends
807 * exactly at the top of the address space and so addr + len
808 * wraps round to zero.
810 vaddr wpend = wp->vaddr + wp->len - 1;
811 vaddr addrend = addr + len - 1;
813 return !(addr > wpend || wp->vaddr > addrend);
816 #endif
818 /* Add a breakpoint. */
819 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
820 CPUBreakpoint **breakpoint)
822 CPUBreakpoint *bp;
824 bp = g_malloc(sizeof(*bp));
826 bp->pc = pc;
827 bp->flags = flags;
829 /* keep all GDB-injected breakpoints in front */
830 if (flags & BP_GDB) {
831 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
832 } else {
833 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
836 breakpoint_invalidate(cpu, pc);
838 if (breakpoint) {
839 *breakpoint = bp;
841 return 0;
844 /* Remove a specific breakpoint. */
845 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
847 CPUBreakpoint *bp;
849 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
850 if (bp->pc == pc && bp->flags == flags) {
851 cpu_breakpoint_remove_by_ref(cpu, bp);
852 return 0;
855 return -ENOENT;
858 /* Remove a specific breakpoint by reference. */
859 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
861 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
863 breakpoint_invalidate(cpu, breakpoint->pc);
865 g_free(breakpoint);
868 /* Remove all matching breakpoints. */
869 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
871 CPUBreakpoint *bp, *next;
873 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
874 if (bp->flags & mask) {
875 cpu_breakpoint_remove_by_ref(cpu, bp);
880 /* enable or disable single step mode. EXCP_DEBUG is returned by the
881 CPU loop after each instruction */
882 void cpu_single_step(CPUState *cpu, int enabled)
884 if (cpu->singlestep_enabled != enabled) {
885 cpu->singlestep_enabled = enabled;
886 if (kvm_enabled()) {
887 kvm_update_guest_debug(cpu, 0);
888 } else {
889 /* must flush all the translated code to avoid inconsistencies */
890 /* XXX: only flush what is necessary */
891 tb_flush(cpu);
896 void cpu_abort(CPUState *cpu, const char *fmt, ...)
898 va_list ap;
899 va_list ap2;
901 va_start(ap, fmt);
902 va_copy(ap2, ap);
903 fprintf(stderr, "qemu: fatal: ");
904 vfprintf(stderr, fmt, ap);
905 fprintf(stderr, "\n");
906 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
907 if (qemu_log_separate()) {
908 qemu_log("qemu: fatal: ");
909 qemu_log_vprintf(fmt, ap2);
910 qemu_log("\n");
911 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
912 qemu_log_flush();
913 qemu_log_close();
915 va_end(ap2);
916 va_end(ap);
917 replay_finish();
918 #if defined(CONFIG_USER_ONLY)
920 struct sigaction act;
921 sigfillset(&act.sa_mask);
922 act.sa_handler = SIG_DFL;
923 sigaction(SIGABRT, &act, NULL);
925 #endif
926 abort();
929 #if !defined(CONFIG_USER_ONLY)
930 /* Called from RCU critical section */
931 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
933 RAMBlock *block;
935 block = atomic_rcu_read(&ram_list.mru_block);
936 if (block && addr - block->offset < block->max_length) {
937 return block;
939 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
940 if (addr - block->offset < block->max_length) {
941 goto found;
945 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
946 abort();
948 found:
949 /* It is safe to write mru_block outside the iothread lock. This
950 * is what happens:
952 * mru_block = xxx
953 * rcu_read_unlock()
954 * xxx removed from list
955 * rcu_read_lock()
956 * read mru_block
957 * mru_block = NULL;
958 * call_rcu(reclaim_ramblock, xxx);
959 * rcu_read_unlock()
961 * atomic_rcu_set is not needed here. The block was already published
962 * when it was placed into the list. Here we're just making an extra
963 * copy of the pointer.
965 ram_list.mru_block = block;
966 return block;
969 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
971 CPUState *cpu;
972 ram_addr_t start1;
973 RAMBlock *block;
974 ram_addr_t end;
976 end = TARGET_PAGE_ALIGN(start + length);
977 start &= TARGET_PAGE_MASK;
979 rcu_read_lock();
980 block = qemu_get_ram_block(start);
981 assert(block == qemu_get_ram_block(end - 1));
982 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
983 CPU_FOREACH(cpu) {
984 tlb_reset_dirty(cpu, start1, length);
986 rcu_read_unlock();
989 /* Note: start and end must be within the same ram block. */
990 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
991 ram_addr_t length,
992 unsigned client)
994 DirtyMemoryBlocks *blocks;
995 unsigned long end, page;
996 bool dirty = false;
998 if (length == 0) {
999 return false;
1002 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1003 page = start >> TARGET_PAGE_BITS;
1005 rcu_read_lock();
1007 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1009 while (page < end) {
1010 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1011 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1012 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1014 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1015 offset, num);
1016 page += num;
1019 rcu_read_unlock();
1021 if (dirty && tcg_enabled()) {
1022 tlb_reset_dirty_range_all(start, length);
1025 return dirty;
1028 /* Called from RCU critical section */
1029 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1030 MemoryRegionSection *section,
1031 target_ulong vaddr,
1032 hwaddr paddr, hwaddr xlat,
1033 int prot,
1034 target_ulong *address)
1036 hwaddr iotlb;
1037 CPUWatchpoint *wp;
1039 if (memory_region_is_ram(section->mr)) {
1040 /* Normal RAM. */
1041 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1042 if (!section->readonly) {
1043 iotlb |= PHYS_SECTION_NOTDIRTY;
1044 } else {
1045 iotlb |= PHYS_SECTION_ROM;
1047 } else {
1048 AddressSpaceDispatch *d;
1050 d = atomic_rcu_read(&section->address_space->dispatch);
1051 iotlb = section - d->map.sections;
1052 iotlb += xlat;
1055 /* Make accesses to pages with watchpoints go via the
1056 watchpoint trap routines. */
1057 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1058 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1059 /* Avoid trapping reads of pages with a write breakpoint. */
1060 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1061 iotlb = PHYS_SECTION_WATCH + paddr;
1062 *address |= TLB_MMIO;
1063 break;
1068 return iotlb;
1070 #endif /* defined(CONFIG_USER_ONLY) */
1072 #if !defined(CONFIG_USER_ONLY)
1074 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1075 uint16_t section);
1076 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1078 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1079 qemu_anon_ram_alloc;
1082 * Set a custom physical guest memory alloator.
1083 * Accelerators with unusual needs may need this. Hopefully, we can
1084 * get rid of it eventually.
1086 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1088 phys_mem_alloc = alloc;
1091 static uint16_t phys_section_add(PhysPageMap *map,
1092 MemoryRegionSection *section)
1094 /* The physical section number is ORed with a page-aligned
1095 * pointer to produce the iotlb entries. Thus it should
1096 * never overflow into the page-aligned value.
1098 assert(map->sections_nb < TARGET_PAGE_SIZE);
1100 if (map->sections_nb == map->sections_nb_alloc) {
1101 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1102 map->sections = g_renew(MemoryRegionSection, map->sections,
1103 map->sections_nb_alloc);
1105 map->sections[map->sections_nb] = *section;
1106 memory_region_ref(section->mr);
1107 return map->sections_nb++;
1110 static void phys_section_destroy(MemoryRegion *mr)
1112 bool have_sub_page = mr->subpage;
1114 memory_region_unref(mr);
1116 if (have_sub_page) {
1117 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1118 object_unref(OBJECT(&subpage->iomem));
1119 g_free(subpage);
1123 static void phys_sections_free(PhysPageMap *map)
1125 while (map->sections_nb > 0) {
1126 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1127 phys_section_destroy(section->mr);
1129 g_free(map->sections);
1130 g_free(map->nodes);
1133 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1135 subpage_t *subpage;
1136 hwaddr base = section->offset_within_address_space
1137 & TARGET_PAGE_MASK;
1138 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1139 d->map.nodes, d->map.sections);
1140 MemoryRegionSection subsection = {
1141 .offset_within_address_space = base,
1142 .size = int128_make64(TARGET_PAGE_SIZE),
1144 hwaddr start, end;
1146 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1148 if (!(existing->mr->subpage)) {
1149 subpage = subpage_init(d->as, base);
1150 subsection.address_space = d->as;
1151 subsection.mr = &subpage->iomem;
1152 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1153 phys_section_add(&d->map, &subsection));
1154 } else {
1155 subpage = container_of(existing->mr, subpage_t, iomem);
1157 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1158 end = start + int128_get64(section->size) - 1;
1159 subpage_register(subpage, start, end,
1160 phys_section_add(&d->map, section));
1164 static void register_multipage(AddressSpaceDispatch *d,
1165 MemoryRegionSection *section)
1167 hwaddr start_addr = section->offset_within_address_space;
1168 uint16_t section_index = phys_section_add(&d->map, section);
1169 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1170 TARGET_PAGE_BITS));
1172 assert(num_pages);
1173 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1176 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1178 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1179 AddressSpaceDispatch *d = as->next_dispatch;
1180 MemoryRegionSection now = *section, remain = *section;
1181 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1183 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1184 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1185 - now.offset_within_address_space;
1187 now.size = int128_min(int128_make64(left), now.size);
1188 register_subpage(d, &now);
1189 } else {
1190 now.size = int128_zero();
1192 while (int128_ne(remain.size, now.size)) {
1193 remain.size = int128_sub(remain.size, now.size);
1194 remain.offset_within_address_space += int128_get64(now.size);
1195 remain.offset_within_region += int128_get64(now.size);
1196 now = remain;
1197 if (int128_lt(remain.size, page_size)) {
1198 register_subpage(d, &now);
1199 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1200 now.size = page_size;
1201 register_subpage(d, &now);
1202 } else {
1203 now.size = int128_and(now.size, int128_neg(page_size));
1204 register_multipage(d, &now);
1209 void qemu_flush_coalesced_mmio_buffer(void)
1211 if (kvm_enabled())
1212 kvm_flush_coalesced_mmio_buffer();
1215 void qemu_mutex_lock_ramlist(void)
1217 qemu_mutex_lock(&ram_list.mutex);
1220 void qemu_mutex_unlock_ramlist(void)
1222 qemu_mutex_unlock(&ram_list.mutex);
1225 #ifdef __linux__
1226 static void *file_ram_alloc(RAMBlock *block,
1227 ram_addr_t memory,
1228 const char *path,
1229 Error **errp)
1231 bool unlink_on_error = false;
1232 char *filename;
1233 char *sanitized_name;
1234 char *c;
1235 void *area = MAP_FAILED;
1236 int fd = -1;
1237 int64_t page_size;
1239 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1240 error_setg(errp,
1241 "host lacks kvm mmu notifiers, -mem-path unsupported");
1242 return NULL;
1245 for (;;) {
1246 fd = open(path, O_RDWR);
1247 if (fd >= 0) {
1248 /* @path names an existing file, use it */
1249 break;
1251 if (errno == ENOENT) {
1252 /* @path names a file that doesn't exist, create it */
1253 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1254 if (fd >= 0) {
1255 unlink_on_error = true;
1256 break;
1258 } else if (errno == EISDIR) {
1259 /* @path names a directory, create a file there */
1260 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1261 sanitized_name = g_strdup(memory_region_name(block->mr));
1262 for (c = sanitized_name; *c != '\0'; c++) {
1263 if (*c == '/') {
1264 *c = '_';
1268 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1269 sanitized_name);
1270 g_free(sanitized_name);
1272 fd = mkstemp(filename);
1273 if (fd >= 0) {
1274 unlink(filename);
1275 g_free(filename);
1276 break;
1278 g_free(filename);
1280 if (errno != EEXIST && errno != EINTR) {
1281 error_setg_errno(errp, errno,
1282 "can't open backing store %s for guest RAM",
1283 path);
1284 goto error;
1287 * Try again on EINTR and EEXIST. The latter happens when
1288 * something else creates the file between our two open().
1292 page_size = qemu_fd_getpagesize(fd);
1293 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1295 if (memory < page_size) {
1296 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1297 "or larger than page size 0x%" PRIx64,
1298 memory, page_size);
1299 goto error;
1302 memory = ROUND_UP(memory, page_size);
1305 * ftruncate is not supported by hugetlbfs in older
1306 * hosts, so don't bother bailing out on errors.
1307 * If anything goes wrong with it under other filesystems,
1308 * mmap will fail.
1310 if (ftruncate(fd, memory)) {
1311 perror("ftruncate");
1314 area = qemu_ram_mmap(fd, memory, block->mr->align,
1315 block->flags & RAM_SHARED);
1316 if (area == MAP_FAILED) {
1317 error_setg_errno(errp, errno,
1318 "unable to map backing store for guest RAM");
1319 goto error;
1322 if (mem_prealloc) {
1323 os_mem_prealloc(fd, area, memory, errp);
1324 if (errp && *errp) {
1325 goto error;
1329 block->fd = fd;
1330 return area;
1332 error:
1333 if (area != MAP_FAILED) {
1334 qemu_ram_munmap(area, memory);
1336 if (unlink_on_error) {
1337 unlink(path);
1339 if (fd != -1) {
1340 close(fd);
1342 return NULL;
1344 #endif
1346 /* Called with the ramlist lock held. */
1347 static ram_addr_t find_ram_offset(ram_addr_t size)
1349 RAMBlock *block, *next_block;
1350 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1352 assert(size != 0); /* it would hand out same offset multiple times */
1354 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1355 return 0;
1358 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1359 ram_addr_t end, next = RAM_ADDR_MAX;
1361 end = block->offset + block->max_length;
1363 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1364 if (next_block->offset >= end) {
1365 next = MIN(next, next_block->offset);
1368 if (next - end >= size && next - end < mingap) {
1369 offset = end;
1370 mingap = next - end;
1374 if (offset == RAM_ADDR_MAX) {
1375 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1376 (uint64_t)size);
1377 abort();
1380 return offset;
1383 ram_addr_t last_ram_offset(void)
1385 RAMBlock *block;
1386 ram_addr_t last = 0;
1388 rcu_read_lock();
1389 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1390 last = MAX(last, block->offset + block->max_length);
1392 rcu_read_unlock();
1393 return last;
1396 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1398 int ret;
1400 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1401 if (!machine_dump_guest_core(current_machine)) {
1402 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1403 if (ret) {
1404 perror("qemu_madvise");
1405 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1406 "but dump_guest_core=off specified\n");
1411 const char *qemu_ram_get_idstr(RAMBlock *rb)
1413 return rb->idstr;
1416 /* Called with iothread lock held. */
1417 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1419 RAMBlock *block;
1421 assert(new_block);
1422 assert(!new_block->idstr[0]);
1424 if (dev) {
1425 char *id = qdev_get_dev_path(dev);
1426 if (id) {
1427 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1428 g_free(id);
1431 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1433 rcu_read_lock();
1434 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1435 if (block != new_block &&
1436 !strcmp(block->idstr, new_block->idstr)) {
1437 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1438 new_block->idstr);
1439 abort();
1442 rcu_read_unlock();
1445 /* Called with iothread lock held. */
1446 void qemu_ram_unset_idstr(RAMBlock *block)
1448 /* FIXME: arch_init.c assumes that this is not called throughout
1449 * migration. Ignore the problem since hot-unplug during migration
1450 * does not work anyway.
1452 if (block) {
1453 memset(block->idstr, 0, sizeof(block->idstr));
1457 static int memory_try_enable_merging(void *addr, size_t len)
1459 if (!machine_mem_merge(current_machine)) {
1460 /* disabled by the user */
1461 return 0;
1464 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1467 /* Only legal before guest might have detected the memory size: e.g. on
1468 * incoming migration, or right after reset.
1470 * As memory core doesn't know how is memory accessed, it is up to
1471 * resize callback to update device state and/or add assertions to detect
1472 * misuse, if necessary.
1474 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1476 assert(block);
1478 newsize = HOST_PAGE_ALIGN(newsize);
1480 if (block->used_length == newsize) {
1481 return 0;
1484 if (!(block->flags & RAM_RESIZEABLE)) {
1485 error_setg_errno(errp, EINVAL,
1486 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1487 " in != 0x" RAM_ADDR_FMT, block->idstr,
1488 newsize, block->used_length);
1489 return -EINVAL;
1492 if (block->max_length < newsize) {
1493 error_setg_errno(errp, EINVAL,
1494 "Length too large: %s: 0x" RAM_ADDR_FMT
1495 " > 0x" RAM_ADDR_FMT, block->idstr,
1496 newsize, block->max_length);
1497 return -EINVAL;
1500 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1501 block->used_length = newsize;
1502 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1503 DIRTY_CLIENTS_ALL);
1504 memory_region_set_size(block->mr, newsize);
1505 if (block->resized) {
1506 block->resized(block->idstr, newsize, block->host);
1508 return 0;
1511 /* Called with ram_list.mutex held */
1512 static void dirty_memory_extend(ram_addr_t old_ram_size,
1513 ram_addr_t new_ram_size)
1515 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1516 DIRTY_MEMORY_BLOCK_SIZE);
1517 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1518 DIRTY_MEMORY_BLOCK_SIZE);
1519 int i;
1521 /* Only need to extend if block count increased */
1522 if (new_num_blocks <= old_num_blocks) {
1523 return;
1526 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1527 DirtyMemoryBlocks *old_blocks;
1528 DirtyMemoryBlocks *new_blocks;
1529 int j;
1531 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1532 new_blocks = g_malloc(sizeof(*new_blocks) +
1533 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1535 if (old_num_blocks) {
1536 memcpy(new_blocks->blocks, old_blocks->blocks,
1537 old_num_blocks * sizeof(old_blocks->blocks[0]));
1540 for (j = old_num_blocks; j < new_num_blocks; j++) {
1541 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1544 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1546 if (old_blocks) {
1547 g_free_rcu(old_blocks, rcu);
1552 static void ram_block_add(RAMBlock *new_block, Error **errp)
1554 RAMBlock *block;
1555 RAMBlock *last_block = NULL;
1556 ram_addr_t old_ram_size, new_ram_size;
1557 Error *err = NULL;
1559 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1561 qemu_mutex_lock_ramlist();
1562 new_block->offset = find_ram_offset(new_block->max_length);
1564 if (!new_block->host) {
1565 if (xen_enabled()) {
1566 xen_ram_alloc(new_block->offset, new_block->max_length,
1567 new_block->mr, &err);
1568 if (err) {
1569 error_propagate(errp, err);
1570 qemu_mutex_unlock_ramlist();
1571 return;
1573 } else {
1574 new_block->host = phys_mem_alloc(new_block->max_length,
1575 &new_block->mr->align);
1576 if (!new_block->host) {
1577 error_setg_errno(errp, errno,
1578 "cannot set up guest memory '%s'",
1579 memory_region_name(new_block->mr));
1580 qemu_mutex_unlock_ramlist();
1581 return;
1583 memory_try_enable_merging(new_block->host, new_block->max_length);
1587 new_ram_size = MAX(old_ram_size,
1588 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1589 if (new_ram_size > old_ram_size) {
1590 migration_bitmap_extend(old_ram_size, new_ram_size);
1591 dirty_memory_extend(old_ram_size, new_ram_size);
1593 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1594 * QLIST (which has an RCU-friendly variant) does not have insertion at
1595 * tail, so save the last element in last_block.
1597 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1598 last_block = block;
1599 if (block->max_length < new_block->max_length) {
1600 break;
1603 if (block) {
1604 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1605 } else if (last_block) {
1606 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1607 } else { /* list is empty */
1608 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1610 ram_list.mru_block = NULL;
1612 /* Write list before version */
1613 smp_wmb();
1614 ram_list.version++;
1615 qemu_mutex_unlock_ramlist();
1617 cpu_physical_memory_set_dirty_range(new_block->offset,
1618 new_block->used_length,
1619 DIRTY_CLIENTS_ALL);
1621 if (new_block->host) {
1622 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1623 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1624 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1625 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1629 #ifdef __linux__
1630 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1631 bool share, const char *mem_path,
1632 Error **errp)
1634 RAMBlock *new_block;
1635 Error *local_err = NULL;
1637 if (xen_enabled()) {
1638 error_setg(errp, "-mem-path not supported with Xen");
1639 return NULL;
1642 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1644 * file_ram_alloc() needs to allocate just like
1645 * phys_mem_alloc, but we haven't bothered to provide
1646 * a hook there.
1648 error_setg(errp,
1649 "-mem-path not supported with this accelerator");
1650 return NULL;
1653 size = HOST_PAGE_ALIGN(size);
1654 new_block = g_malloc0(sizeof(*new_block));
1655 new_block->mr = mr;
1656 new_block->used_length = size;
1657 new_block->max_length = size;
1658 new_block->flags = share ? RAM_SHARED : 0;
1659 new_block->host = file_ram_alloc(new_block, size,
1660 mem_path, errp);
1661 if (!new_block->host) {
1662 g_free(new_block);
1663 return NULL;
1666 ram_block_add(new_block, &local_err);
1667 if (local_err) {
1668 g_free(new_block);
1669 error_propagate(errp, local_err);
1670 return NULL;
1672 return new_block;
1674 #endif
1676 static
1677 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1678 void (*resized)(const char*,
1679 uint64_t length,
1680 void *host),
1681 void *host, bool resizeable,
1682 MemoryRegion *mr, Error **errp)
1684 RAMBlock *new_block;
1685 Error *local_err = NULL;
1687 size = HOST_PAGE_ALIGN(size);
1688 max_size = HOST_PAGE_ALIGN(max_size);
1689 new_block = g_malloc0(sizeof(*new_block));
1690 new_block->mr = mr;
1691 new_block->resized = resized;
1692 new_block->used_length = size;
1693 new_block->max_length = max_size;
1694 assert(max_size >= size);
1695 new_block->fd = -1;
1696 new_block->host = host;
1697 if (host) {
1698 new_block->flags |= RAM_PREALLOC;
1700 if (resizeable) {
1701 new_block->flags |= RAM_RESIZEABLE;
1703 ram_block_add(new_block, &local_err);
1704 if (local_err) {
1705 g_free(new_block);
1706 error_propagate(errp, local_err);
1707 return NULL;
1709 return new_block;
1712 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1713 MemoryRegion *mr, Error **errp)
1715 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1718 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1720 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1723 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1724 void (*resized)(const char*,
1725 uint64_t length,
1726 void *host),
1727 MemoryRegion *mr, Error **errp)
1729 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1732 static void reclaim_ramblock(RAMBlock *block)
1734 if (block->flags & RAM_PREALLOC) {
1736 } else if (xen_enabled()) {
1737 xen_invalidate_map_cache_entry(block->host);
1738 #ifndef _WIN32
1739 } else if (block->fd >= 0) {
1740 qemu_ram_munmap(block->host, block->max_length);
1741 close(block->fd);
1742 #endif
1743 } else {
1744 qemu_anon_ram_free(block->host, block->max_length);
1746 g_free(block);
1749 void qemu_ram_free(RAMBlock *block)
1751 if (!block) {
1752 return;
1755 qemu_mutex_lock_ramlist();
1756 QLIST_REMOVE_RCU(block, next);
1757 ram_list.mru_block = NULL;
1758 /* Write list before version */
1759 smp_wmb();
1760 ram_list.version++;
1761 call_rcu(block, reclaim_ramblock, rcu);
1762 qemu_mutex_unlock_ramlist();
1765 #ifndef _WIN32
1766 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1768 RAMBlock *block;
1769 ram_addr_t offset;
1770 int flags;
1771 void *area, *vaddr;
1773 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1774 offset = addr - block->offset;
1775 if (offset < block->max_length) {
1776 vaddr = ramblock_ptr(block, offset);
1777 if (block->flags & RAM_PREALLOC) {
1779 } else if (xen_enabled()) {
1780 abort();
1781 } else {
1782 flags = MAP_FIXED;
1783 if (block->fd >= 0) {
1784 flags |= (block->flags & RAM_SHARED ?
1785 MAP_SHARED : MAP_PRIVATE);
1786 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1787 flags, block->fd, offset);
1788 } else {
1790 * Remap needs to match alloc. Accelerators that
1791 * set phys_mem_alloc never remap. If they did,
1792 * we'd need a remap hook here.
1794 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1796 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1797 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1798 flags, -1, 0);
1800 if (area != vaddr) {
1801 fprintf(stderr, "Could not remap addr: "
1802 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1803 length, addr);
1804 exit(1);
1806 memory_try_enable_merging(vaddr, length);
1807 qemu_ram_setup_dump(vaddr, length);
1812 #endif /* !_WIN32 */
1814 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1815 * This should not be used for general purpose DMA. Use address_space_map
1816 * or address_space_rw instead. For local memory (e.g. video ram) that the
1817 * device owns, use memory_region_get_ram_ptr.
1819 * Called within RCU critical section.
1821 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1823 RAMBlock *block = ram_block;
1825 if (block == NULL) {
1826 block = qemu_get_ram_block(addr);
1827 addr -= block->offset;
1830 if (xen_enabled() && block->host == NULL) {
1831 /* We need to check if the requested address is in the RAM
1832 * because we don't want to map the entire memory in QEMU.
1833 * In that case just map until the end of the page.
1835 if (block->offset == 0) {
1836 return xen_map_cache(addr, 0, 0);
1839 block->host = xen_map_cache(block->offset, block->max_length, 1);
1841 return ramblock_ptr(block, addr);
1844 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1845 * but takes a size argument.
1847 * Called within RCU critical section.
1849 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1850 hwaddr *size)
1852 RAMBlock *block = ram_block;
1853 if (*size == 0) {
1854 return NULL;
1857 if (block == NULL) {
1858 block = qemu_get_ram_block(addr);
1859 addr -= block->offset;
1861 *size = MIN(*size, block->max_length - addr);
1863 if (xen_enabled() && block->host == NULL) {
1864 /* We need to check if the requested address is in the RAM
1865 * because we don't want to map the entire memory in QEMU.
1866 * In that case just map the requested area.
1868 if (block->offset == 0) {
1869 return xen_map_cache(addr, *size, 1);
1872 block->host = xen_map_cache(block->offset, block->max_length, 1);
1875 return ramblock_ptr(block, addr);
1879 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1880 * in that RAMBlock.
1882 * ptr: Host pointer to look up
1883 * round_offset: If true round the result offset down to a page boundary
1884 * *ram_addr: set to result ram_addr
1885 * *offset: set to result offset within the RAMBlock
1887 * Returns: RAMBlock (or NULL if not found)
1889 * By the time this function returns, the returned pointer is not protected
1890 * by RCU anymore. If the caller is not within an RCU critical section and
1891 * does not hold the iothread lock, it must have other means of protecting the
1892 * pointer, such as a reference to the region that includes the incoming
1893 * ram_addr_t.
1895 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1896 ram_addr_t *offset)
1898 RAMBlock *block;
1899 uint8_t *host = ptr;
1901 if (xen_enabled()) {
1902 ram_addr_t ram_addr;
1903 rcu_read_lock();
1904 ram_addr = xen_ram_addr_from_mapcache(ptr);
1905 block = qemu_get_ram_block(ram_addr);
1906 if (block) {
1907 *offset = ram_addr - block->offset;
1909 rcu_read_unlock();
1910 return block;
1913 rcu_read_lock();
1914 block = atomic_rcu_read(&ram_list.mru_block);
1915 if (block && block->host && host - block->host < block->max_length) {
1916 goto found;
1919 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1920 /* This case append when the block is not mapped. */
1921 if (block->host == NULL) {
1922 continue;
1924 if (host - block->host < block->max_length) {
1925 goto found;
1929 rcu_read_unlock();
1930 return NULL;
1932 found:
1933 *offset = (host - block->host);
1934 if (round_offset) {
1935 *offset &= TARGET_PAGE_MASK;
1937 rcu_read_unlock();
1938 return block;
1942 * Finds the named RAMBlock
1944 * name: The name of RAMBlock to find
1946 * Returns: RAMBlock (or NULL if not found)
1948 RAMBlock *qemu_ram_block_by_name(const char *name)
1950 RAMBlock *block;
1952 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1953 if (!strcmp(name, block->idstr)) {
1954 return block;
1958 return NULL;
1961 /* Some of the softmmu routines need to translate from a host pointer
1962 (typically a TLB entry) back to a ram offset. */
1963 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1965 RAMBlock *block;
1966 ram_addr_t offset;
1968 block = qemu_ram_block_from_host(ptr, false, &offset);
1969 if (!block) {
1970 return RAM_ADDR_INVALID;
1973 return block->offset + offset;
1976 /* Called within RCU critical section. */
1977 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1978 uint64_t val, unsigned size)
1980 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1981 tb_invalidate_phys_page_fast(ram_addr, size);
1983 switch (size) {
1984 case 1:
1985 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1986 break;
1987 case 2:
1988 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1989 break;
1990 case 4:
1991 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1992 break;
1993 default:
1994 abort();
1996 /* Set both VGA and migration bits for simplicity and to remove
1997 * the notdirty callback faster.
1999 cpu_physical_memory_set_dirty_range(ram_addr, size,
2000 DIRTY_CLIENTS_NOCODE);
2001 /* we remove the notdirty callback only if the code has been
2002 flushed */
2003 if (!cpu_physical_memory_is_clean(ram_addr)) {
2004 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2008 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2009 unsigned size, bool is_write)
2011 return is_write;
2014 static const MemoryRegionOps notdirty_mem_ops = {
2015 .write = notdirty_mem_write,
2016 .valid.accepts = notdirty_mem_accepts,
2017 .endianness = DEVICE_NATIVE_ENDIAN,
2020 /* Generate a debug exception if a watchpoint has been hit. */
2021 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2023 CPUState *cpu = current_cpu;
2024 CPUClass *cc = CPU_GET_CLASS(cpu);
2025 CPUArchState *env = cpu->env_ptr;
2026 target_ulong pc, cs_base;
2027 target_ulong vaddr;
2028 CPUWatchpoint *wp;
2029 uint32_t cpu_flags;
2031 if (cpu->watchpoint_hit) {
2032 /* We re-entered the check after replacing the TB. Now raise
2033 * the debug interrupt so that is will trigger after the
2034 * current instruction. */
2035 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2036 return;
2038 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2039 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2040 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2041 && (wp->flags & flags)) {
2042 if (flags == BP_MEM_READ) {
2043 wp->flags |= BP_WATCHPOINT_HIT_READ;
2044 } else {
2045 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2047 wp->hitaddr = vaddr;
2048 wp->hitattrs = attrs;
2049 if (!cpu->watchpoint_hit) {
2050 if (wp->flags & BP_CPU &&
2051 !cc->debug_check_watchpoint(cpu, wp)) {
2052 wp->flags &= ~BP_WATCHPOINT_HIT;
2053 continue;
2055 cpu->watchpoint_hit = wp;
2056 tb_check_watchpoint(cpu);
2057 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2058 cpu->exception_index = EXCP_DEBUG;
2059 cpu_loop_exit(cpu);
2060 } else {
2061 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2062 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2063 cpu_loop_exit_noexc(cpu);
2066 } else {
2067 wp->flags &= ~BP_WATCHPOINT_HIT;
2072 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2073 so these check for a hit then pass through to the normal out-of-line
2074 phys routines. */
2075 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2076 unsigned size, MemTxAttrs attrs)
2078 MemTxResult res;
2079 uint64_t data;
2080 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2081 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2083 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2084 switch (size) {
2085 case 1:
2086 data = address_space_ldub(as, addr, attrs, &res);
2087 break;
2088 case 2:
2089 data = address_space_lduw(as, addr, attrs, &res);
2090 break;
2091 case 4:
2092 data = address_space_ldl(as, addr, attrs, &res);
2093 break;
2094 default: abort();
2096 *pdata = data;
2097 return res;
2100 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2101 uint64_t val, unsigned size,
2102 MemTxAttrs attrs)
2104 MemTxResult res;
2105 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2106 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2108 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2109 switch (size) {
2110 case 1:
2111 address_space_stb(as, addr, val, attrs, &res);
2112 break;
2113 case 2:
2114 address_space_stw(as, addr, val, attrs, &res);
2115 break;
2116 case 4:
2117 address_space_stl(as, addr, val, attrs, &res);
2118 break;
2119 default: abort();
2121 return res;
2124 static const MemoryRegionOps watch_mem_ops = {
2125 .read_with_attrs = watch_mem_read,
2126 .write_with_attrs = watch_mem_write,
2127 .endianness = DEVICE_NATIVE_ENDIAN,
2130 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2131 unsigned len, MemTxAttrs attrs)
2133 subpage_t *subpage = opaque;
2134 uint8_t buf[8];
2135 MemTxResult res;
2137 #if defined(DEBUG_SUBPAGE)
2138 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2139 subpage, len, addr);
2140 #endif
2141 res = address_space_read(subpage->as, addr + subpage->base,
2142 attrs, buf, len);
2143 if (res) {
2144 return res;
2146 switch (len) {
2147 case 1:
2148 *data = ldub_p(buf);
2149 return MEMTX_OK;
2150 case 2:
2151 *data = lduw_p(buf);
2152 return MEMTX_OK;
2153 case 4:
2154 *data = ldl_p(buf);
2155 return MEMTX_OK;
2156 case 8:
2157 *data = ldq_p(buf);
2158 return MEMTX_OK;
2159 default:
2160 abort();
2164 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2165 uint64_t value, unsigned len, MemTxAttrs attrs)
2167 subpage_t *subpage = opaque;
2168 uint8_t buf[8];
2170 #if defined(DEBUG_SUBPAGE)
2171 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2172 " value %"PRIx64"\n",
2173 __func__, subpage, len, addr, value);
2174 #endif
2175 switch (len) {
2176 case 1:
2177 stb_p(buf, value);
2178 break;
2179 case 2:
2180 stw_p(buf, value);
2181 break;
2182 case 4:
2183 stl_p(buf, value);
2184 break;
2185 case 8:
2186 stq_p(buf, value);
2187 break;
2188 default:
2189 abort();
2191 return address_space_write(subpage->as, addr + subpage->base,
2192 attrs, buf, len);
2195 static bool subpage_accepts(void *opaque, hwaddr addr,
2196 unsigned len, bool is_write)
2198 subpage_t *subpage = opaque;
2199 #if defined(DEBUG_SUBPAGE)
2200 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2201 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2202 #endif
2204 return address_space_access_valid(subpage->as, addr + subpage->base,
2205 len, is_write);
2208 static const MemoryRegionOps subpage_ops = {
2209 .read_with_attrs = subpage_read,
2210 .write_with_attrs = subpage_write,
2211 .impl.min_access_size = 1,
2212 .impl.max_access_size = 8,
2213 .valid.min_access_size = 1,
2214 .valid.max_access_size = 8,
2215 .valid.accepts = subpage_accepts,
2216 .endianness = DEVICE_NATIVE_ENDIAN,
2219 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2220 uint16_t section)
2222 int idx, eidx;
2224 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2225 return -1;
2226 idx = SUBPAGE_IDX(start);
2227 eidx = SUBPAGE_IDX(end);
2228 #if defined(DEBUG_SUBPAGE)
2229 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2230 __func__, mmio, start, end, idx, eidx, section);
2231 #endif
2232 for (; idx <= eidx; idx++) {
2233 mmio->sub_section[idx] = section;
2236 return 0;
2239 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2241 subpage_t *mmio;
2243 mmio = g_malloc0(sizeof(subpage_t));
2245 mmio->as = as;
2246 mmio->base = base;
2247 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2248 NULL, TARGET_PAGE_SIZE);
2249 mmio->iomem.subpage = true;
2250 #if defined(DEBUG_SUBPAGE)
2251 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2252 mmio, base, TARGET_PAGE_SIZE);
2253 #endif
2254 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2256 return mmio;
2259 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2260 MemoryRegion *mr)
2262 assert(as);
2263 MemoryRegionSection section = {
2264 .address_space = as,
2265 .mr = mr,
2266 .offset_within_address_space = 0,
2267 .offset_within_region = 0,
2268 .size = int128_2_64(),
2271 return phys_section_add(map, &section);
2274 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2276 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2277 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2278 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2279 MemoryRegionSection *sections = d->map.sections;
2281 return sections[index & ~TARGET_PAGE_MASK].mr;
2284 static void io_mem_init(void)
2286 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2287 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2288 NULL, UINT64_MAX);
2289 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2290 NULL, UINT64_MAX);
2291 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2292 NULL, UINT64_MAX);
2295 static void mem_begin(MemoryListener *listener)
2297 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2298 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2299 uint16_t n;
2301 n = dummy_section(&d->map, as, &io_mem_unassigned);
2302 assert(n == PHYS_SECTION_UNASSIGNED);
2303 n = dummy_section(&d->map, as, &io_mem_notdirty);
2304 assert(n == PHYS_SECTION_NOTDIRTY);
2305 n = dummy_section(&d->map, as, &io_mem_rom);
2306 assert(n == PHYS_SECTION_ROM);
2307 n = dummy_section(&d->map, as, &io_mem_watch);
2308 assert(n == PHYS_SECTION_WATCH);
2310 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2311 d->as = as;
2312 as->next_dispatch = d;
2315 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2317 phys_sections_free(&d->map);
2318 g_free(d);
2321 static void mem_commit(MemoryListener *listener)
2323 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2324 AddressSpaceDispatch *cur = as->dispatch;
2325 AddressSpaceDispatch *next = as->next_dispatch;
2327 phys_page_compact_all(next, next->map.nodes_nb);
2329 atomic_rcu_set(&as->dispatch, next);
2330 if (cur) {
2331 call_rcu(cur, address_space_dispatch_free, rcu);
2335 static void tcg_commit(MemoryListener *listener)
2337 CPUAddressSpace *cpuas;
2338 AddressSpaceDispatch *d;
2340 /* since each CPU stores ram addresses in its TLB cache, we must
2341 reset the modified entries */
2342 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2343 cpu_reloading_memory_map();
2344 /* The CPU and TLB are protected by the iothread lock.
2345 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2346 * may have split the RCU critical section.
2348 d = atomic_rcu_read(&cpuas->as->dispatch);
2349 cpuas->memory_dispatch = d;
2350 tlb_flush(cpuas->cpu, 1);
2353 void address_space_init_dispatch(AddressSpace *as)
2355 as->dispatch = NULL;
2356 as->dispatch_listener = (MemoryListener) {
2357 .begin = mem_begin,
2358 .commit = mem_commit,
2359 .region_add = mem_add,
2360 .region_nop = mem_add,
2361 .priority = 0,
2363 memory_listener_register(&as->dispatch_listener, as);
2366 void address_space_unregister(AddressSpace *as)
2368 memory_listener_unregister(&as->dispatch_listener);
2371 void address_space_destroy_dispatch(AddressSpace *as)
2373 AddressSpaceDispatch *d = as->dispatch;
2375 atomic_rcu_set(&as->dispatch, NULL);
2376 if (d) {
2377 call_rcu(d, address_space_dispatch_free, rcu);
2381 static void memory_map_init(void)
2383 system_memory = g_malloc(sizeof(*system_memory));
2385 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2386 address_space_init(&address_space_memory, system_memory, "memory");
2388 system_io = g_malloc(sizeof(*system_io));
2389 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2390 65536);
2391 address_space_init(&address_space_io, system_io, "I/O");
2394 MemoryRegion *get_system_memory(void)
2396 return system_memory;
2399 MemoryRegion *get_system_io(void)
2401 return system_io;
2404 #endif /* !defined(CONFIG_USER_ONLY) */
2406 /* physical memory access (slow version, mainly for debug) */
2407 #if defined(CONFIG_USER_ONLY)
2408 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2409 uint8_t *buf, int len, int is_write)
2411 int l, flags;
2412 target_ulong page;
2413 void * p;
2415 while (len > 0) {
2416 page = addr & TARGET_PAGE_MASK;
2417 l = (page + TARGET_PAGE_SIZE) - addr;
2418 if (l > len)
2419 l = len;
2420 flags = page_get_flags(page);
2421 if (!(flags & PAGE_VALID))
2422 return -1;
2423 if (is_write) {
2424 if (!(flags & PAGE_WRITE))
2425 return -1;
2426 /* XXX: this code should not depend on lock_user */
2427 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2428 return -1;
2429 memcpy(p, buf, l);
2430 unlock_user(p, addr, l);
2431 } else {
2432 if (!(flags & PAGE_READ))
2433 return -1;
2434 /* XXX: this code should not depend on lock_user */
2435 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2436 return -1;
2437 memcpy(buf, p, l);
2438 unlock_user(p, addr, 0);
2440 len -= l;
2441 buf += l;
2442 addr += l;
2444 return 0;
2447 #else
2449 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2450 hwaddr length)
2452 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2453 addr += memory_region_get_ram_addr(mr);
2455 /* No early return if dirty_log_mask is or becomes 0, because
2456 * cpu_physical_memory_set_dirty_range will still call
2457 * xen_modified_memory.
2459 if (dirty_log_mask) {
2460 dirty_log_mask =
2461 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2463 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2464 tb_invalidate_phys_range(addr, addr + length);
2465 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2467 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2470 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2472 unsigned access_size_max = mr->ops->valid.max_access_size;
2474 /* Regions are assumed to support 1-4 byte accesses unless
2475 otherwise specified. */
2476 if (access_size_max == 0) {
2477 access_size_max = 4;
2480 /* Bound the maximum access by the alignment of the address. */
2481 if (!mr->ops->impl.unaligned) {
2482 unsigned align_size_max = addr & -addr;
2483 if (align_size_max != 0 && align_size_max < access_size_max) {
2484 access_size_max = align_size_max;
2488 /* Don't attempt accesses larger than the maximum. */
2489 if (l > access_size_max) {
2490 l = access_size_max;
2492 l = pow2floor(l);
2494 return l;
2497 static bool prepare_mmio_access(MemoryRegion *mr)
2499 bool unlocked = !qemu_mutex_iothread_locked();
2500 bool release_lock = false;
2502 if (unlocked && mr->global_locking) {
2503 qemu_mutex_lock_iothread();
2504 unlocked = false;
2505 release_lock = true;
2507 if (mr->flush_coalesced_mmio) {
2508 if (unlocked) {
2509 qemu_mutex_lock_iothread();
2511 qemu_flush_coalesced_mmio_buffer();
2512 if (unlocked) {
2513 qemu_mutex_unlock_iothread();
2517 return release_lock;
2520 /* Called within RCU critical section. */
2521 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2522 MemTxAttrs attrs,
2523 const uint8_t *buf,
2524 int len, hwaddr addr1,
2525 hwaddr l, MemoryRegion *mr)
2527 uint8_t *ptr;
2528 uint64_t val;
2529 MemTxResult result = MEMTX_OK;
2530 bool release_lock = false;
2532 for (;;) {
2533 if (!memory_access_is_direct(mr, true)) {
2534 release_lock |= prepare_mmio_access(mr);
2535 l = memory_access_size(mr, l, addr1);
2536 /* XXX: could force current_cpu to NULL to avoid
2537 potential bugs */
2538 switch (l) {
2539 case 8:
2540 /* 64 bit write access */
2541 val = ldq_p(buf);
2542 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2543 attrs);
2544 break;
2545 case 4:
2546 /* 32 bit write access */
2547 val = ldl_p(buf);
2548 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2549 attrs);
2550 break;
2551 case 2:
2552 /* 16 bit write access */
2553 val = lduw_p(buf);
2554 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2555 attrs);
2556 break;
2557 case 1:
2558 /* 8 bit write access */
2559 val = ldub_p(buf);
2560 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2561 attrs);
2562 break;
2563 default:
2564 abort();
2566 } else {
2567 /* RAM case */
2568 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2569 memcpy(ptr, buf, l);
2570 invalidate_and_set_dirty(mr, addr1, l);
2573 if (release_lock) {
2574 qemu_mutex_unlock_iothread();
2575 release_lock = false;
2578 len -= l;
2579 buf += l;
2580 addr += l;
2582 if (!len) {
2583 break;
2586 l = len;
2587 mr = address_space_translate(as, addr, &addr1, &l, true);
2590 return result;
2593 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2594 const uint8_t *buf, int len)
2596 hwaddr l;
2597 hwaddr addr1;
2598 MemoryRegion *mr;
2599 MemTxResult result = MEMTX_OK;
2601 if (len > 0) {
2602 rcu_read_lock();
2603 l = len;
2604 mr = address_space_translate(as, addr, &addr1, &l, true);
2605 result = address_space_write_continue(as, addr, attrs, buf, len,
2606 addr1, l, mr);
2607 rcu_read_unlock();
2610 return result;
2613 /* Called within RCU critical section. */
2614 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2615 MemTxAttrs attrs, uint8_t *buf,
2616 int len, hwaddr addr1, hwaddr l,
2617 MemoryRegion *mr)
2619 uint8_t *ptr;
2620 uint64_t val;
2621 MemTxResult result = MEMTX_OK;
2622 bool release_lock = false;
2624 for (;;) {
2625 if (!memory_access_is_direct(mr, false)) {
2626 /* I/O case */
2627 release_lock |= prepare_mmio_access(mr);
2628 l = memory_access_size(mr, l, addr1);
2629 switch (l) {
2630 case 8:
2631 /* 64 bit read access */
2632 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2633 attrs);
2634 stq_p(buf, val);
2635 break;
2636 case 4:
2637 /* 32 bit read access */
2638 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2639 attrs);
2640 stl_p(buf, val);
2641 break;
2642 case 2:
2643 /* 16 bit read access */
2644 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2645 attrs);
2646 stw_p(buf, val);
2647 break;
2648 case 1:
2649 /* 8 bit read access */
2650 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2651 attrs);
2652 stb_p(buf, val);
2653 break;
2654 default:
2655 abort();
2657 } else {
2658 /* RAM case */
2659 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2660 memcpy(buf, ptr, l);
2663 if (release_lock) {
2664 qemu_mutex_unlock_iothread();
2665 release_lock = false;
2668 len -= l;
2669 buf += l;
2670 addr += l;
2672 if (!len) {
2673 break;
2676 l = len;
2677 mr = address_space_translate(as, addr, &addr1, &l, false);
2680 return result;
2683 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2684 MemTxAttrs attrs, uint8_t *buf, int len)
2686 hwaddr l;
2687 hwaddr addr1;
2688 MemoryRegion *mr;
2689 MemTxResult result = MEMTX_OK;
2691 if (len > 0) {
2692 rcu_read_lock();
2693 l = len;
2694 mr = address_space_translate(as, addr, &addr1, &l, false);
2695 result = address_space_read_continue(as, addr, attrs, buf, len,
2696 addr1, l, mr);
2697 rcu_read_unlock();
2700 return result;
2703 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2704 uint8_t *buf, int len, bool is_write)
2706 if (is_write) {
2707 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2708 } else {
2709 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2713 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2714 int len, int is_write)
2716 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2717 buf, len, is_write);
2720 enum write_rom_type {
2721 WRITE_DATA,
2722 FLUSH_CACHE,
2725 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2726 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2728 hwaddr l;
2729 uint8_t *ptr;
2730 hwaddr addr1;
2731 MemoryRegion *mr;
2733 rcu_read_lock();
2734 while (len > 0) {
2735 l = len;
2736 mr = address_space_translate(as, addr, &addr1, &l, true);
2738 if (!(memory_region_is_ram(mr) ||
2739 memory_region_is_romd(mr))) {
2740 l = memory_access_size(mr, l, addr1);
2741 } else {
2742 /* ROM/RAM case */
2743 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2744 switch (type) {
2745 case WRITE_DATA:
2746 memcpy(ptr, buf, l);
2747 invalidate_and_set_dirty(mr, addr1, l);
2748 break;
2749 case FLUSH_CACHE:
2750 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2751 break;
2754 len -= l;
2755 buf += l;
2756 addr += l;
2758 rcu_read_unlock();
2761 /* used for ROM loading : can write in RAM and ROM */
2762 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2763 const uint8_t *buf, int len)
2765 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2768 void cpu_flush_icache_range(hwaddr start, int len)
2771 * This function should do the same thing as an icache flush that was
2772 * triggered from within the guest. For TCG we are always cache coherent,
2773 * so there is no need to flush anything. For KVM / Xen we need to flush
2774 * the host's instruction cache at least.
2776 if (tcg_enabled()) {
2777 return;
2780 cpu_physical_memory_write_rom_internal(&address_space_memory,
2781 start, NULL, len, FLUSH_CACHE);
2784 typedef struct {
2785 MemoryRegion *mr;
2786 void *buffer;
2787 hwaddr addr;
2788 hwaddr len;
2789 bool in_use;
2790 } BounceBuffer;
2792 static BounceBuffer bounce;
2794 typedef struct MapClient {
2795 QEMUBH *bh;
2796 QLIST_ENTRY(MapClient) link;
2797 } MapClient;
2799 QemuMutex map_client_list_lock;
2800 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2801 = QLIST_HEAD_INITIALIZER(map_client_list);
2803 static void cpu_unregister_map_client_do(MapClient *client)
2805 QLIST_REMOVE(client, link);
2806 g_free(client);
2809 static void cpu_notify_map_clients_locked(void)
2811 MapClient *client;
2813 while (!QLIST_EMPTY(&map_client_list)) {
2814 client = QLIST_FIRST(&map_client_list);
2815 qemu_bh_schedule(client->bh);
2816 cpu_unregister_map_client_do(client);
2820 void cpu_register_map_client(QEMUBH *bh)
2822 MapClient *client = g_malloc(sizeof(*client));
2824 qemu_mutex_lock(&map_client_list_lock);
2825 client->bh = bh;
2826 QLIST_INSERT_HEAD(&map_client_list, client, link);
2827 if (!atomic_read(&bounce.in_use)) {
2828 cpu_notify_map_clients_locked();
2830 qemu_mutex_unlock(&map_client_list_lock);
2833 void cpu_exec_init_all(void)
2835 qemu_mutex_init(&ram_list.mutex);
2836 io_mem_init();
2837 memory_map_init();
2838 qemu_mutex_init(&map_client_list_lock);
2841 void cpu_unregister_map_client(QEMUBH *bh)
2843 MapClient *client;
2845 qemu_mutex_lock(&map_client_list_lock);
2846 QLIST_FOREACH(client, &map_client_list, link) {
2847 if (client->bh == bh) {
2848 cpu_unregister_map_client_do(client);
2849 break;
2852 qemu_mutex_unlock(&map_client_list_lock);
2855 static void cpu_notify_map_clients(void)
2857 qemu_mutex_lock(&map_client_list_lock);
2858 cpu_notify_map_clients_locked();
2859 qemu_mutex_unlock(&map_client_list_lock);
2862 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2864 MemoryRegion *mr;
2865 hwaddr l, xlat;
2867 rcu_read_lock();
2868 while (len > 0) {
2869 l = len;
2870 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2871 if (!memory_access_is_direct(mr, is_write)) {
2872 l = memory_access_size(mr, l, addr);
2873 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2874 return false;
2878 len -= l;
2879 addr += l;
2881 rcu_read_unlock();
2882 return true;
2885 /* Map a physical memory region into a host virtual address.
2886 * May map a subset of the requested range, given by and returned in *plen.
2887 * May return NULL if resources needed to perform the mapping are exhausted.
2888 * Use only for reads OR writes - not for read-modify-write operations.
2889 * Use cpu_register_map_client() to know when retrying the map operation is
2890 * likely to succeed.
2892 void *address_space_map(AddressSpace *as,
2893 hwaddr addr,
2894 hwaddr *plen,
2895 bool is_write)
2897 hwaddr len = *plen;
2898 hwaddr done = 0;
2899 hwaddr l, xlat, base;
2900 MemoryRegion *mr, *this_mr;
2901 void *ptr;
2903 if (len == 0) {
2904 return NULL;
2907 l = len;
2908 rcu_read_lock();
2909 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2911 if (!memory_access_is_direct(mr, is_write)) {
2912 if (atomic_xchg(&bounce.in_use, true)) {
2913 rcu_read_unlock();
2914 return NULL;
2916 /* Avoid unbounded allocations */
2917 l = MIN(l, TARGET_PAGE_SIZE);
2918 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2919 bounce.addr = addr;
2920 bounce.len = l;
2922 memory_region_ref(mr);
2923 bounce.mr = mr;
2924 if (!is_write) {
2925 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2926 bounce.buffer, l);
2929 rcu_read_unlock();
2930 *plen = l;
2931 return bounce.buffer;
2934 base = xlat;
2936 for (;;) {
2937 len -= l;
2938 addr += l;
2939 done += l;
2940 if (len == 0) {
2941 break;
2944 l = len;
2945 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2946 if (this_mr != mr || xlat != base + done) {
2947 break;
2951 memory_region_ref(mr);
2952 *plen = done;
2953 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2954 rcu_read_unlock();
2956 return ptr;
2959 /* Unmaps a memory region previously mapped by address_space_map().
2960 * Will also mark the memory as dirty if is_write == 1. access_len gives
2961 * the amount of memory that was actually read or written by the caller.
2963 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2964 int is_write, hwaddr access_len)
2966 if (buffer != bounce.buffer) {
2967 MemoryRegion *mr;
2968 ram_addr_t addr1;
2970 mr = memory_region_from_host(buffer, &addr1);
2971 assert(mr != NULL);
2972 if (is_write) {
2973 invalidate_and_set_dirty(mr, addr1, access_len);
2975 if (xen_enabled()) {
2976 xen_invalidate_map_cache_entry(buffer);
2978 memory_region_unref(mr);
2979 return;
2981 if (is_write) {
2982 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2983 bounce.buffer, access_len);
2985 qemu_vfree(bounce.buffer);
2986 bounce.buffer = NULL;
2987 memory_region_unref(bounce.mr);
2988 atomic_mb_set(&bounce.in_use, false);
2989 cpu_notify_map_clients();
2992 void *cpu_physical_memory_map(hwaddr addr,
2993 hwaddr *plen,
2994 int is_write)
2996 return address_space_map(&address_space_memory, addr, plen, is_write);
2999 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3000 int is_write, hwaddr access_len)
3002 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3005 /* warning: addr must be aligned */
3006 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3007 MemTxAttrs attrs,
3008 MemTxResult *result,
3009 enum device_endian endian)
3011 uint8_t *ptr;
3012 uint64_t val;
3013 MemoryRegion *mr;
3014 hwaddr l = 4;
3015 hwaddr addr1;
3016 MemTxResult r;
3017 bool release_lock = false;
3019 rcu_read_lock();
3020 mr = address_space_translate(as, addr, &addr1, &l, false);
3021 if (l < 4 || !memory_access_is_direct(mr, false)) {
3022 release_lock |= prepare_mmio_access(mr);
3024 /* I/O case */
3025 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3026 #if defined(TARGET_WORDS_BIGENDIAN)
3027 if (endian == DEVICE_LITTLE_ENDIAN) {
3028 val = bswap32(val);
3030 #else
3031 if (endian == DEVICE_BIG_ENDIAN) {
3032 val = bswap32(val);
3034 #endif
3035 } else {
3036 /* RAM case */
3037 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3038 switch (endian) {
3039 case DEVICE_LITTLE_ENDIAN:
3040 val = ldl_le_p(ptr);
3041 break;
3042 case DEVICE_BIG_ENDIAN:
3043 val = ldl_be_p(ptr);
3044 break;
3045 default:
3046 val = ldl_p(ptr);
3047 break;
3049 r = MEMTX_OK;
3051 if (result) {
3052 *result = r;
3054 if (release_lock) {
3055 qemu_mutex_unlock_iothread();
3057 rcu_read_unlock();
3058 return val;
3061 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3062 MemTxAttrs attrs, MemTxResult *result)
3064 return address_space_ldl_internal(as, addr, attrs, result,
3065 DEVICE_NATIVE_ENDIAN);
3068 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3069 MemTxAttrs attrs, MemTxResult *result)
3071 return address_space_ldl_internal(as, addr, attrs, result,
3072 DEVICE_LITTLE_ENDIAN);
3075 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3076 MemTxAttrs attrs, MemTxResult *result)
3078 return address_space_ldl_internal(as, addr, attrs, result,
3079 DEVICE_BIG_ENDIAN);
3082 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3084 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3087 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3089 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3092 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3094 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3097 /* warning: addr must be aligned */
3098 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3099 MemTxAttrs attrs,
3100 MemTxResult *result,
3101 enum device_endian endian)
3103 uint8_t *ptr;
3104 uint64_t val;
3105 MemoryRegion *mr;
3106 hwaddr l = 8;
3107 hwaddr addr1;
3108 MemTxResult r;
3109 bool release_lock = false;
3111 rcu_read_lock();
3112 mr = address_space_translate(as, addr, &addr1, &l,
3113 false);
3114 if (l < 8 || !memory_access_is_direct(mr, false)) {
3115 release_lock |= prepare_mmio_access(mr);
3117 /* I/O case */
3118 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3119 #if defined(TARGET_WORDS_BIGENDIAN)
3120 if (endian == DEVICE_LITTLE_ENDIAN) {
3121 val = bswap64(val);
3123 #else
3124 if (endian == DEVICE_BIG_ENDIAN) {
3125 val = bswap64(val);
3127 #endif
3128 } else {
3129 /* RAM case */
3130 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3131 switch (endian) {
3132 case DEVICE_LITTLE_ENDIAN:
3133 val = ldq_le_p(ptr);
3134 break;
3135 case DEVICE_BIG_ENDIAN:
3136 val = ldq_be_p(ptr);
3137 break;
3138 default:
3139 val = ldq_p(ptr);
3140 break;
3142 r = MEMTX_OK;
3144 if (result) {
3145 *result = r;
3147 if (release_lock) {
3148 qemu_mutex_unlock_iothread();
3150 rcu_read_unlock();
3151 return val;
3154 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3155 MemTxAttrs attrs, MemTxResult *result)
3157 return address_space_ldq_internal(as, addr, attrs, result,
3158 DEVICE_NATIVE_ENDIAN);
3161 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3162 MemTxAttrs attrs, MemTxResult *result)
3164 return address_space_ldq_internal(as, addr, attrs, result,
3165 DEVICE_LITTLE_ENDIAN);
3168 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3169 MemTxAttrs attrs, MemTxResult *result)
3171 return address_space_ldq_internal(as, addr, attrs, result,
3172 DEVICE_BIG_ENDIAN);
3175 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3177 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3180 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3182 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3185 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3187 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3190 /* XXX: optimize */
3191 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3192 MemTxAttrs attrs, MemTxResult *result)
3194 uint8_t val;
3195 MemTxResult r;
3197 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3198 if (result) {
3199 *result = r;
3201 return val;
3204 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3206 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3209 /* warning: addr must be aligned */
3210 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3211 hwaddr addr,
3212 MemTxAttrs attrs,
3213 MemTxResult *result,
3214 enum device_endian endian)
3216 uint8_t *ptr;
3217 uint64_t val;
3218 MemoryRegion *mr;
3219 hwaddr l = 2;
3220 hwaddr addr1;
3221 MemTxResult r;
3222 bool release_lock = false;
3224 rcu_read_lock();
3225 mr = address_space_translate(as, addr, &addr1, &l,
3226 false);
3227 if (l < 2 || !memory_access_is_direct(mr, false)) {
3228 release_lock |= prepare_mmio_access(mr);
3230 /* I/O case */
3231 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3232 #if defined(TARGET_WORDS_BIGENDIAN)
3233 if (endian == DEVICE_LITTLE_ENDIAN) {
3234 val = bswap16(val);
3236 #else
3237 if (endian == DEVICE_BIG_ENDIAN) {
3238 val = bswap16(val);
3240 #endif
3241 } else {
3242 /* RAM case */
3243 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3244 switch (endian) {
3245 case DEVICE_LITTLE_ENDIAN:
3246 val = lduw_le_p(ptr);
3247 break;
3248 case DEVICE_BIG_ENDIAN:
3249 val = lduw_be_p(ptr);
3250 break;
3251 default:
3252 val = lduw_p(ptr);
3253 break;
3255 r = MEMTX_OK;
3257 if (result) {
3258 *result = r;
3260 if (release_lock) {
3261 qemu_mutex_unlock_iothread();
3263 rcu_read_unlock();
3264 return val;
3267 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3268 MemTxAttrs attrs, MemTxResult *result)
3270 return address_space_lduw_internal(as, addr, attrs, result,
3271 DEVICE_NATIVE_ENDIAN);
3274 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3275 MemTxAttrs attrs, MemTxResult *result)
3277 return address_space_lduw_internal(as, addr, attrs, result,
3278 DEVICE_LITTLE_ENDIAN);
3281 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3282 MemTxAttrs attrs, MemTxResult *result)
3284 return address_space_lduw_internal(as, addr, attrs, result,
3285 DEVICE_BIG_ENDIAN);
3288 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3290 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3293 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3295 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3298 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3300 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3303 /* warning: addr must be aligned. The ram page is not masked as dirty
3304 and the code inside is not invalidated. It is useful if the dirty
3305 bits are used to track modified PTEs */
3306 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3307 MemTxAttrs attrs, MemTxResult *result)
3309 uint8_t *ptr;
3310 MemoryRegion *mr;
3311 hwaddr l = 4;
3312 hwaddr addr1;
3313 MemTxResult r;
3314 uint8_t dirty_log_mask;
3315 bool release_lock = false;
3317 rcu_read_lock();
3318 mr = address_space_translate(as, addr, &addr1, &l,
3319 true);
3320 if (l < 4 || !memory_access_is_direct(mr, true)) {
3321 release_lock |= prepare_mmio_access(mr);
3323 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3324 } else {
3325 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3326 stl_p(ptr, val);
3328 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3329 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3330 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3331 4, dirty_log_mask);
3332 r = MEMTX_OK;
3334 if (result) {
3335 *result = r;
3337 if (release_lock) {
3338 qemu_mutex_unlock_iothread();
3340 rcu_read_unlock();
3343 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3345 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3348 /* warning: addr must be aligned */
3349 static inline void address_space_stl_internal(AddressSpace *as,
3350 hwaddr addr, uint32_t val,
3351 MemTxAttrs attrs,
3352 MemTxResult *result,
3353 enum device_endian endian)
3355 uint8_t *ptr;
3356 MemoryRegion *mr;
3357 hwaddr l = 4;
3358 hwaddr addr1;
3359 MemTxResult r;
3360 bool release_lock = false;
3362 rcu_read_lock();
3363 mr = address_space_translate(as, addr, &addr1, &l,
3364 true);
3365 if (l < 4 || !memory_access_is_direct(mr, true)) {
3366 release_lock |= prepare_mmio_access(mr);
3368 #if defined(TARGET_WORDS_BIGENDIAN)
3369 if (endian == DEVICE_LITTLE_ENDIAN) {
3370 val = bswap32(val);
3372 #else
3373 if (endian == DEVICE_BIG_ENDIAN) {
3374 val = bswap32(val);
3376 #endif
3377 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3378 } else {
3379 /* RAM case */
3380 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3381 switch (endian) {
3382 case DEVICE_LITTLE_ENDIAN:
3383 stl_le_p(ptr, val);
3384 break;
3385 case DEVICE_BIG_ENDIAN:
3386 stl_be_p(ptr, val);
3387 break;
3388 default:
3389 stl_p(ptr, val);
3390 break;
3392 invalidate_and_set_dirty(mr, addr1, 4);
3393 r = MEMTX_OK;
3395 if (result) {
3396 *result = r;
3398 if (release_lock) {
3399 qemu_mutex_unlock_iothread();
3401 rcu_read_unlock();
3404 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3405 MemTxAttrs attrs, MemTxResult *result)
3407 address_space_stl_internal(as, addr, val, attrs, result,
3408 DEVICE_NATIVE_ENDIAN);
3411 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3412 MemTxAttrs attrs, MemTxResult *result)
3414 address_space_stl_internal(as, addr, val, attrs, result,
3415 DEVICE_LITTLE_ENDIAN);
3418 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3419 MemTxAttrs attrs, MemTxResult *result)
3421 address_space_stl_internal(as, addr, val, attrs, result,
3422 DEVICE_BIG_ENDIAN);
3425 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3427 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3430 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3432 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3435 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3437 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3440 /* XXX: optimize */
3441 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3442 MemTxAttrs attrs, MemTxResult *result)
3444 uint8_t v = val;
3445 MemTxResult r;
3447 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3448 if (result) {
3449 *result = r;
3453 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3455 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3458 /* warning: addr must be aligned */
3459 static inline void address_space_stw_internal(AddressSpace *as,
3460 hwaddr addr, uint32_t val,
3461 MemTxAttrs attrs,
3462 MemTxResult *result,
3463 enum device_endian endian)
3465 uint8_t *ptr;
3466 MemoryRegion *mr;
3467 hwaddr l = 2;
3468 hwaddr addr1;
3469 MemTxResult r;
3470 bool release_lock = false;
3472 rcu_read_lock();
3473 mr = address_space_translate(as, addr, &addr1, &l, true);
3474 if (l < 2 || !memory_access_is_direct(mr, true)) {
3475 release_lock |= prepare_mmio_access(mr);
3477 #if defined(TARGET_WORDS_BIGENDIAN)
3478 if (endian == DEVICE_LITTLE_ENDIAN) {
3479 val = bswap16(val);
3481 #else
3482 if (endian == DEVICE_BIG_ENDIAN) {
3483 val = bswap16(val);
3485 #endif
3486 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3487 } else {
3488 /* RAM case */
3489 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3490 switch (endian) {
3491 case DEVICE_LITTLE_ENDIAN:
3492 stw_le_p(ptr, val);
3493 break;
3494 case DEVICE_BIG_ENDIAN:
3495 stw_be_p(ptr, val);
3496 break;
3497 default:
3498 stw_p(ptr, val);
3499 break;
3501 invalidate_and_set_dirty(mr, addr1, 2);
3502 r = MEMTX_OK;
3504 if (result) {
3505 *result = r;
3507 if (release_lock) {
3508 qemu_mutex_unlock_iothread();
3510 rcu_read_unlock();
3513 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3514 MemTxAttrs attrs, MemTxResult *result)
3516 address_space_stw_internal(as, addr, val, attrs, result,
3517 DEVICE_NATIVE_ENDIAN);
3520 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3521 MemTxAttrs attrs, MemTxResult *result)
3523 address_space_stw_internal(as, addr, val, attrs, result,
3524 DEVICE_LITTLE_ENDIAN);
3527 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3528 MemTxAttrs attrs, MemTxResult *result)
3530 address_space_stw_internal(as, addr, val, attrs, result,
3531 DEVICE_BIG_ENDIAN);
3534 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3536 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3539 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3541 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3544 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3546 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3549 /* XXX: optimize */
3550 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3551 MemTxAttrs attrs, MemTxResult *result)
3553 MemTxResult r;
3554 val = tswap64(val);
3555 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3556 if (result) {
3557 *result = r;
3561 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3562 MemTxAttrs attrs, MemTxResult *result)
3564 MemTxResult r;
3565 val = cpu_to_le64(val);
3566 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3567 if (result) {
3568 *result = r;
3571 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3572 MemTxAttrs attrs, MemTxResult *result)
3574 MemTxResult r;
3575 val = cpu_to_be64(val);
3576 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3577 if (result) {
3578 *result = r;
3582 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3584 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3587 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3589 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3592 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3594 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3597 /* virtual memory access for debug (includes writing to ROM) */
3598 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3599 uint8_t *buf, int len, int is_write)
3601 int l;
3602 hwaddr phys_addr;
3603 target_ulong page;
3605 while (len > 0) {
3606 int asidx;
3607 MemTxAttrs attrs;
3609 page = addr & TARGET_PAGE_MASK;
3610 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3611 asidx = cpu_asidx_from_attrs(cpu, attrs);
3612 /* if no physical page mapped, return an error */
3613 if (phys_addr == -1)
3614 return -1;
3615 l = (page + TARGET_PAGE_SIZE) - addr;
3616 if (l > len)
3617 l = len;
3618 phys_addr += (addr & ~TARGET_PAGE_MASK);
3619 if (is_write) {
3620 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3621 phys_addr, buf, l);
3622 } else {
3623 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3624 MEMTXATTRS_UNSPECIFIED,
3625 buf, l, 0);
3627 len -= l;
3628 buf += l;
3629 addr += l;
3631 return 0;
3635 * Allows code that needs to deal with migration bitmaps etc to still be built
3636 * target independent.
3638 size_t qemu_target_page_bits(void)
3640 return TARGET_PAGE_BITS;
3643 #endif
3646 * A helper function for the _utterly broken_ virtio device model to find out if
3647 * it's running on a big endian machine. Don't do this at home kids!
3649 bool target_words_bigendian(void);
3650 bool target_words_bigendian(void)
3652 #if defined(TARGET_WORDS_BIGENDIAN)
3653 return true;
3654 #else
3655 return false;
3656 #endif
3659 #ifndef CONFIG_USER_ONLY
3660 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3662 MemoryRegion*mr;
3663 hwaddr l = 1;
3664 bool res;
3666 rcu_read_lock();
3667 mr = address_space_translate(&address_space_memory,
3668 phys_addr, &phys_addr, &l, false);
3670 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3671 rcu_read_unlock();
3672 return res;
3675 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3677 RAMBlock *block;
3678 int ret = 0;
3680 rcu_read_lock();
3681 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3682 ret = func(block->idstr, block->host, block->offset,
3683 block->used_length, opaque);
3684 if (ret) {
3685 break;
3688 rcu_read_unlock();
3689 return ret;
3691 #endif