timer.h: fix inconsistency between comment and function prototype
[qemu/ar7.git] / exec.c
blob80398b038f9c71ba392a47a62f2e356857582818
1 /*
2 * Virtual page mapping
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #ifndef _WIN32
22 #endif
24 #include "qemu/cutils.h"
25 #include "cpu.h"
26 #include "exec/exec-all.h"
27 #include "tcg.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
32 #endif
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include "qemu.h"
40 #else /* !CONFIG_USER_ONLY */
41 #include "hw/hw.h"
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
57 #include "exec/log.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
62 #ifndef _WIN32
63 #include "qemu/mmap-alloc.h"
64 #endif
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
94 #endif
96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
97 /* current CPU in the current thread. It is only valid inside
98 cpu_exec() */
99 __thread CPUState *current_cpu;
100 /* 0 = Do not count executed instructions.
101 1 = Precise instruction counting.
102 2 = Adaptive rate instruction counting. */
103 int use_icount;
105 #if !defined(CONFIG_USER_ONLY)
107 typedef struct PhysPageEntry PhysPageEntry;
109 struct PhysPageEntry {
110 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111 uint32_t skip : 6;
112 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
113 uint32_t ptr : 26;
116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
118 /* Size of the L2 (and L3, etc) page tables. */
119 #define ADDR_SPACE_BITS 64
121 #define P_L2_BITS 9
122 #define P_L2_SIZE (1 << P_L2_BITS)
124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
126 typedef PhysPageEntry Node[P_L2_SIZE];
128 typedef struct PhysPageMap {
129 struct rcu_head rcu;
131 unsigned sections_nb;
132 unsigned sections_nb_alloc;
133 unsigned nodes_nb;
134 unsigned nodes_nb_alloc;
135 Node *nodes;
136 MemoryRegionSection *sections;
137 } PhysPageMap;
139 struct AddressSpaceDispatch {
140 struct rcu_head rcu;
142 MemoryRegionSection *mru_section;
143 /* This is a multi-level map on the physical address space.
144 * The bottom level has pointers to MemoryRegionSections.
146 PhysPageEntry phys_map;
147 PhysPageMap map;
148 AddressSpace *as;
151 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
152 typedef struct subpage_t {
153 MemoryRegion iomem;
154 AddressSpace *as;
155 hwaddr base;
156 uint16_t sub_section[TARGET_PAGE_SIZE];
157 } subpage_t;
159 #define PHYS_SECTION_UNASSIGNED 0
160 #define PHYS_SECTION_NOTDIRTY 1
161 #define PHYS_SECTION_ROM 2
162 #define PHYS_SECTION_WATCH 3
164 static void io_mem_init(void);
165 static void memory_map_init(void);
166 static void tcg_commit(MemoryListener *listener);
168 static MemoryRegion io_mem_watch;
171 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
172 * @cpu: the CPU whose AddressSpace this is
173 * @as: the AddressSpace itself
174 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
175 * @tcg_as_listener: listener for tracking changes to the AddressSpace
177 struct CPUAddressSpace {
178 CPUState *cpu;
179 AddressSpace *as;
180 struct AddressSpaceDispatch *memory_dispatch;
181 MemoryListener tcg_as_listener;
184 #endif
186 #if !defined(CONFIG_USER_ONLY)
188 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
190 static unsigned alloc_hint = 16;
191 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
192 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
193 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
194 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
195 alloc_hint = map->nodes_nb_alloc;
199 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
201 unsigned i;
202 uint32_t ret;
203 PhysPageEntry e;
204 PhysPageEntry *p;
206 ret = map->nodes_nb++;
207 p = map->nodes[ret];
208 assert(ret != PHYS_MAP_NODE_NIL);
209 assert(ret != map->nodes_nb_alloc);
211 e.skip = leaf ? 0 : 1;
212 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
213 for (i = 0; i < P_L2_SIZE; ++i) {
214 memcpy(&p[i], &e, sizeof(e));
216 return ret;
219 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
220 hwaddr *index, hwaddr *nb, uint16_t leaf,
221 int level)
223 PhysPageEntry *p;
224 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
226 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
227 lp->ptr = phys_map_node_alloc(map, level == 0);
229 p = map->nodes[lp->ptr];
230 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
232 while (*nb && lp < &p[P_L2_SIZE]) {
233 if ((*index & (step - 1)) == 0 && *nb >= step) {
234 lp->skip = 0;
235 lp->ptr = leaf;
236 *index += step;
237 *nb -= step;
238 } else {
239 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
241 ++lp;
245 static void phys_page_set(AddressSpaceDispatch *d,
246 hwaddr index, hwaddr nb,
247 uint16_t leaf)
249 /* Wildly overreserve - it doesn't matter much. */
250 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
252 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
255 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
256 * and update our entry so we can skip it and go directly to the destination.
258 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
260 unsigned valid_ptr = P_L2_SIZE;
261 int valid = 0;
262 PhysPageEntry *p;
263 int i;
265 if (lp->ptr == PHYS_MAP_NODE_NIL) {
266 return;
269 p = nodes[lp->ptr];
270 for (i = 0; i < P_L2_SIZE; i++) {
271 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
272 continue;
275 valid_ptr = i;
276 valid++;
277 if (p[i].skip) {
278 phys_page_compact(&p[i], nodes, compacted);
282 /* We can only compress if there's only one child. */
283 if (valid != 1) {
284 return;
287 assert(valid_ptr < P_L2_SIZE);
289 /* Don't compress if it won't fit in the # of bits we have. */
290 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
291 return;
294 lp->ptr = p[valid_ptr].ptr;
295 if (!p[valid_ptr].skip) {
296 /* If our only child is a leaf, make this a leaf. */
297 /* By design, we should have made this node a leaf to begin with so we
298 * should never reach here.
299 * But since it's so simple to handle this, let's do it just in case we
300 * change this rule.
302 lp->skip = 0;
303 } else {
304 lp->skip += p[valid_ptr].skip;
308 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
310 DECLARE_BITMAP(compacted, nodes_nb);
312 if (d->phys_map.skip) {
313 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
317 static inline bool section_covers_addr(const MemoryRegionSection *section,
318 hwaddr addr)
320 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
321 * the section must cover the entire address space.
323 return section->size.hi ||
324 range_covers_byte(section->offset_within_address_space,
325 section->size.lo, addr);
328 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
329 Node *nodes, MemoryRegionSection *sections)
331 PhysPageEntry *p;
332 hwaddr index = addr >> TARGET_PAGE_BITS;
333 int i;
335 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
336 if (lp.ptr == PHYS_MAP_NODE_NIL) {
337 return &sections[PHYS_SECTION_UNASSIGNED];
339 p = nodes[lp.ptr];
340 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
343 if (section_covers_addr(&sections[lp.ptr], addr)) {
344 return &sections[lp.ptr];
345 } else {
346 return &sections[PHYS_SECTION_UNASSIGNED];
350 bool memory_region_is_unassigned(MemoryRegion *mr)
352 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
353 && mr != &io_mem_watch;
356 /* Called from RCU critical section */
357 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
358 hwaddr addr,
359 bool resolve_subpage)
361 MemoryRegionSection *section = atomic_read(&d->mru_section);
362 subpage_t *subpage;
363 bool update;
365 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
366 section_covers_addr(section, addr)) {
367 update = false;
368 } else {
369 section = phys_page_find(d->phys_map, addr, d->map.nodes,
370 d->map.sections);
371 update = true;
373 if (resolve_subpage && section->mr->subpage) {
374 subpage = container_of(section->mr, subpage_t, iomem);
375 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
377 if (update) {
378 atomic_set(&d->mru_section, section);
380 return section;
383 /* Called from RCU critical section */
384 static MemoryRegionSection *
385 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
386 hwaddr *plen, bool resolve_subpage)
388 MemoryRegionSection *section;
389 MemoryRegion *mr;
390 Int128 diff;
392 section = address_space_lookup_region(d, addr, resolve_subpage);
393 /* Compute offset within MemoryRegionSection */
394 addr -= section->offset_within_address_space;
396 /* Compute offset within MemoryRegion */
397 *xlat = addr + section->offset_within_region;
399 mr = section->mr;
401 /* MMIO registers can be expected to perform full-width accesses based only
402 * on their address, without considering adjacent registers that could
403 * decode to completely different MemoryRegions. When such registers
404 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
405 * regions overlap wildly. For this reason we cannot clamp the accesses
406 * here.
408 * If the length is small (as is the case for address_space_ldl/stl),
409 * everything works fine. If the incoming length is large, however,
410 * the caller really has to do the clamping through memory_access_size.
412 if (memory_region_is_ram(mr)) {
413 diff = int128_sub(section->size, int128_make64(addr));
414 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
416 return section;
419 /* Called from RCU critical section */
420 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
421 hwaddr *xlat, hwaddr *plen,
422 bool is_write)
424 IOMMUTLBEntry iotlb;
425 MemoryRegionSection *section;
426 MemoryRegion *mr;
428 for (;;) {
429 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
430 section = address_space_translate_internal(d, addr, &addr, plen, true);
431 mr = section->mr;
433 if (!mr->iommu_ops) {
434 break;
437 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
438 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
439 | (addr & iotlb.addr_mask));
440 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
441 if (!(iotlb.perm & (1 << is_write))) {
442 mr = &io_mem_unassigned;
443 break;
446 as = iotlb.target_as;
449 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
450 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
451 *plen = MIN(page, *plen);
454 *xlat = addr;
455 return mr;
458 /* Called from RCU critical section */
459 MemoryRegionSection *
460 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
461 hwaddr *xlat, hwaddr *plen)
463 MemoryRegionSection *section;
464 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
466 section = address_space_translate_internal(d, addr, xlat, plen, false);
468 assert(!section->mr->iommu_ops);
469 return section;
471 #endif
473 #if !defined(CONFIG_USER_ONLY)
475 static int cpu_common_post_load(void *opaque, int version_id)
477 CPUState *cpu = opaque;
479 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
480 version_id is increased. */
481 cpu->interrupt_request &= ~0x01;
482 tlb_flush(cpu, 1);
484 return 0;
487 static int cpu_common_pre_load(void *opaque)
489 CPUState *cpu = opaque;
491 cpu->exception_index = -1;
493 return 0;
496 static bool cpu_common_exception_index_needed(void *opaque)
498 CPUState *cpu = opaque;
500 return tcg_enabled() && cpu->exception_index != -1;
503 static const VMStateDescription vmstate_cpu_common_exception_index = {
504 .name = "cpu_common/exception_index",
505 .version_id = 1,
506 .minimum_version_id = 1,
507 .needed = cpu_common_exception_index_needed,
508 .fields = (VMStateField[]) {
509 VMSTATE_INT32(exception_index, CPUState),
510 VMSTATE_END_OF_LIST()
514 static bool cpu_common_crash_occurred_needed(void *opaque)
516 CPUState *cpu = opaque;
518 return cpu->crash_occurred;
521 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
522 .name = "cpu_common/crash_occurred",
523 .version_id = 1,
524 .minimum_version_id = 1,
525 .needed = cpu_common_crash_occurred_needed,
526 .fields = (VMStateField[]) {
527 VMSTATE_BOOL(crash_occurred, CPUState),
528 VMSTATE_END_OF_LIST()
532 const VMStateDescription vmstate_cpu_common = {
533 .name = "cpu_common",
534 .version_id = 1,
535 .minimum_version_id = 1,
536 .pre_load = cpu_common_pre_load,
537 .post_load = cpu_common_post_load,
538 .fields = (VMStateField[]) {
539 VMSTATE_UINT32(halted, CPUState),
540 VMSTATE_UINT32(interrupt_request, CPUState),
541 VMSTATE_END_OF_LIST()
543 .subsections = (const VMStateDescription*[]) {
544 &vmstate_cpu_common_exception_index,
545 &vmstate_cpu_common_crash_occurred,
546 NULL
550 #endif
552 CPUState *qemu_get_cpu(int index)
554 CPUState *cpu;
556 CPU_FOREACH(cpu) {
557 if (cpu->cpu_index == index) {
558 return cpu;
562 return NULL;
565 #if !defined(CONFIG_USER_ONLY)
566 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
568 CPUAddressSpace *newas;
570 /* Target code should have set num_ases before calling us */
571 assert(asidx < cpu->num_ases);
573 if (asidx == 0) {
574 /* address space 0 gets the convenience alias */
575 cpu->as = as;
578 /* KVM cannot currently support multiple address spaces. */
579 assert(asidx == 0 || !kvm_enabled());
581 if (!cpu->cpu_ases) {
582 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
585 newas = &cpu->cpu_ases[asidx];
586 newas->cpu = cpu;
587 newas->as = as;
588 if (tcg_enabled()) {
589 newas->tcg_as_listener.commit = tcg_commit;
590 memory_listener_register(&newas->tcg_as_listener, as);
594 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
596 /* Return the AddressSpace corresponding to the specified index */
597 return cpu->cpu_ases[asidx].as;
599 #endif
601 static bool cpu_index_auto_assigned;
603 static int cpu_get_free_index(void)
605 CPUState *some_cpu;
606 int cpu_index = 0;
608 cpu_index_auto_assigned = true;
609 CPU_FOREACH(some_cpu) {
610 cpu_index++;
612 return cpu_index;
615 void cpu_exec_exit(CPUState *cpu)
617 CPUClass *cc = CPU_GET_CLASS(cpu);
619 cpu_list_lock();
620 if (cpu->node.tqe_prev == NULL) {
621 /* there is nothing to undo since cpu_exec_init() hasn't been called */
622 cpu_list_unlock();
623 return;
626 assert(!(cpu_index_auto_assigned && cpu != QTAILQ_LAST(&cpus, CPUTailQ)));
628 QTAILQ_REMOVE(&cpus, cpu, node);
629 cpu->node.tqe_prev = NULL;
630 cpu->cpu_index = UNASSIGNED_CPU_INDEX;
631 cpu_list_unlock();
633 if (cc->vmsd != NULL) {
634 vmstate_unregister(NULL, cc->vmsd, cpu);
636 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
637 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
641 void cpu_exec_init(CPUState *cpu, Error **errp)
643 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
644 Error *local_err ATTRIBUTE_UNUSED = NULL;
646 cpu->as = NULL;
647 cpu->num_ases = 0;
649 #ifndef CONFIG_USER_ONLY
650 cpu->thread_id = qemu_get_thread_id();
652 /* This is a softmmu CPU object, so create a property for it
653 * so users can wire up its memory. (This can't go in qom/cpu.c
654 * because that file is compiled only once for both user-mode
655 * and system builds.) The default if no link is set up is to use
656 * the system address space.
658 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
659 (Object **)&cpu->memory,
660 qdev_prop_allow_set_link_before_realize,
661 OBJ_PROP_LINK_UNREF_ON_RELEASE,
662 &error_abort);
663 cpu->memory = system_memory;
664 object_ref(OBJECT(cpu->memory));
665 #endif
667 cpu_list_lock();
668 if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
669 cpu->cpu_index = cpu_get_free_index();
670 assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
671 } else {
672 assert(!cpu_index_auto_assigned);
674 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
675 cpu_list_unlock();
677 #ifndef CONFIG_USER_ONLY
678 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
679 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
681 if (cc->vmsd != NULL) {
682 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
684 #endif
687 #if defined(CONFIG_USER_ONLY)
688 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
690 tb_invalidate_phys_page_range(pc, pc + 1, 0);
692 #else
693 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
695 MemTxAttrs attrs;
696 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
697 int asidx = cpu_asidx_from_attrs(cpu, attrs);
698 if (phys != -1) {
699 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
700 phys | (pc & ~TARGET_PAGE_MASK));
703 #endif
705 #if defined(CONFIG_USER_ONLY)
706 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
711 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
712 int flags)
714 return -ENOSYS;
717 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
721 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
722 int flags, CPUWatchpoint **watchpoint)
724 return -ENOSYS;
726 #else
727 /* Add a watchpoint. */
728 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
729 int flags, CPUWatchpoint **watchpoint)
731 CPUWatchpoint *wp;
733 /* forbid ranges which are empty or run off the end of the address space */
734 if (len == 0 || (addr + len - 1) < addr) {
735 error_report("tried to set invalid watchpoint at %"
736 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
737 return -EINVAL;
739 wp = g_malloc(sizeof(*wp));
741 wp->vaddr = addr;
742 wp->len = len;
743 wp->flags = flags;
745 /* keep all GDB-injected watchpoints in front */
746 if (flags & BP_GDB) {
747 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
748 } else {
749 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
752 tlb_flush_page(cpu, addr);
754 if (watchpoint)
755 *watchpoint = wp;
756 return 0;
759 /* Remove a specific watchpoint. */
760 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
761 int flags)
763 CPUWatchpoint *wp;
765 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
766 if (addr == wp->vaddr && len == wp->len
767 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
768 cpu_watchpoint_remove_by_ref(cpu, wp);
769 return 0;
772 return -ENOENT;
775 /* Remove a specific watchpoint by reference. */
776 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
778 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
780 tlb_flush_page(cpu, watchpoint->vaddr);
782 g_free(watchpoint);
785 /* Remove all matching watchpoints. */
786 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
788 CPUWatchpoint *wp, *next;
790 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
791 if (wp->flags & mask) {
792 cpu_watchpoint_remove_by_ref(cpu, wp);
797 /* Return true if this watchpoint address matches the specified
798 * access (ie the address range covered by the watchpoint overlaps
799 * partially or completely with the address range covered by the
800 * access).
802 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
803 vaddr addr,
804 vaddr len)
806 /* We know the lengths are non-zero, but a little caution is
807 * required to avoid errors in the case where the range ends
808 * exactly at the top of the address space and so addr + len
809 * wraps round to zero.
811 vaddr wpend = wp->vaddr + wp->len - 1;
812 vaddr addrend = addr + len - 1;
814 return !(addr > wpend || wp->vaddr > addrend);
817 #endif
819 /* Add a breakpoint. */
820 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
821 CPUBreakpoint **breakpoint)
823 CPUBreakpoint *bp;
825 bp = g_malloc(sizeof(*bp));
827 bp->pc = pc;
828 bp->flags = flags;
830 /* keep all GDB-injected breakpoints in front */
831 if (flags & BP_GDB) {
832 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
833 } else {
834 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
837 breakpoint_invalidate(cpu, pc);
839 if (breakpoint) {
840 *breakpoint = bp;
842 return 0;
845 /* Remove a specific breakpoint. */
846 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
848 CPUBreakpoint *bp;
850 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
851 if (bp->pc == pc && bp->flags == flags) {
852 cpu_breakpoint_remove_by_ref(cpu, bp);
853 return 0;
856 return -ENOENT;
859 /* Remove a specific breakpoint by reference. */
860 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
862 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
864 breakpoint_invalidate(cpu, breakpoint->pc);
866 g_free(breakpoint);
869 /* Remove all matching breakpoints. */
870 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
872 CPUBreakpoint *bp, *next;
874 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
875 if (bp->flags & mask) {
876 cpu_breakpoint_remove_by_ref(cpu, bp);
881 /* enable or disable single step mode. EXCP_DEBUG is returned by the
882 CPU loop after each instruction */
883 void cpu_single_step(CPUState *cpu, int enabled)
885 if (cpu->singlestep_enabled != enabled) {
886 cpu->singlestep_enabled = enabled;
887 if (kvm_enabled()) {
888 kvm_update_guest_debug(cpu, 0);
889 } else {
890 /* must flush all the translated code to avoid inconsistencies */
891 /* XXX: only flush what is necessary */
892 tb_flush(cpu);
897 void cpu_abort(CPUState *cpu, const char *fmt, ...)
899 va_list ap;
900 va_list ap2;
902 va_start(ap, fmt);
903 va_copy(ap2, ap);
904 fprintf(stderr, "qemu: fatal: ");
905 vfprintf(stderr, fmt, ap);
906 fprintf(stderr, "\n");
907 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
908 if (qemu_log_separate()) {
909 qemu_log("qemu: fatal: ");
910 qemu_log_vprintf(fmt, ap2);
911 qemu_log("\n");
912 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
913 qemu_log_flush();
914 qemu_log_close();
916 va_end(ap2);
917 va_end(ap);
918 replay_finish();
919 #if defined(CONFIG_USER_ONLY)
921 struct sigaction act;
922 sigfillset(&act.sa_mask);
923 act.sa_handler = SIG_DFL;
924 sigaction(SIGABRT, &act, NULL);
926 #endif
927 abort();
930 #if !defined(CONFIG_USER_ONLY)
931 /* Called from RCU critical section */
932 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
934 RAMBlock *block;
936 block = atomic_rcu_read(&ram_list.mru_block);
937 if (block && addr - block->offset < block->max_length) {
938 return block;
940 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
941 if (addr - block->offset < block->max_length) {
942 goto found;
946 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
947 abort();
949 found:
950 /* It is safe to write mru_block outside the iothread lock. This
951 * is what happens:
953 * mru_block = xxx
954 * rcu_read_unlock()
955 * xxx removed from list
956 * rcu_read_lock()
957 * read mru_block
958 * mru_block = NULL;
959 * call_rcu(reclaim_ramblock, xxx);
960 * rcu_read_unlock()
962 * atomic_rcu_set is not needed here. The block was already published
963 * when it was placed into the list. Here we're just making an extra
964 * copy of the pointer.
966 ram_list.mru_block = block;
967 return block;
970 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
972 CPUState *cpu;
973 ram_addr_t start1;
974 RAMBlock *block;
975 ram_addr_t end;
977 end = TARGET_PAGE_ALIGN(start + length);
978 start &= TARGET_PAGE_MASK;
980 rcu_read_lock();
981 block = qemu_get_ram_block(start);
982 assert(block == qemu_get_ram_block(end - 1));
983 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
984 CPU_FOREACH(cpu) {
985 tlb_reset_dirty(cpu, start1, length);
987 rcu_read_unlock();
990 /* Note: start and end must be within the same ram block. */
991 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
992 ram_addr_t length,
993 unsigned client)
995 DirtyMemoryBlocks *blocks;
996 unsigned long end, page;
997 bool dirty = false;
999 if (length == 0) {
1000 return false;
1003 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1004 page = start >> TARGET_PAGE_BITS;
1006 rcu_read_lock();
1008 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1010 while (page < end) {
1011 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1012 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1013 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1015 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1016 offset, num);
1017 page += num;
1020 rcu_read_unlock();
1022 if (dirty && tcg_enabled()) {
1023 tlb_reset_dirty_range_all(start, length);
1026 return dirty;
1029 /* Called from RCU critical section */
1030 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1031 MemoryRegionSection *section,
1032 target_ulong vaddr,
1033 hwaddr paddr, hwaddr xlat,
1034 int prot,
1035 target_ulong *address)
1037 hwaddr iotlb;
1038 CPUWatchpoint *wp;
1040 if (memory_region_is_ram(section->mr)) {
1041 /* Normal RAM. */
1042 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1043 if (!section->readonly) {
1044 iotlb |= PHYS_SECTION_NOTDIRTY;
1045 } else {
1046 iotlb |= PHYS_SECTION_ROM;
1048 } else {
1049 AddressSpaceDispatch *d;
1051 d = atomic_rcu_read(&section->address_space->dispatch);
1052 iotlb = section - d->map.sections;
1053 iotlb += xlat;
1056 /* Make accesses to pages with watchpoints go via the
1057 watchpoint trap routines. */
1058 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1059 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1060 /* Avoid trapping reads of pages with a write breakpoint. */
1061 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1062 iotlb = PHYS_SECTION_WATCH + paddr;
1063 *address |= TLB_MMIO;
1064 break;
1069 return iotlb;
1071 #endif /* defined(CONFIG_USER_ONLY) */
1073 #if !defined(CONFIG_USER_ONLY)
1075 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1076 uint16_t section);
1077 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1079 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1080 qemu_anon_ram_alloc;
1083 * Set a custom physical guest memory alloator.
1084 * Accelerators with unusual needs may need this. Hopefully, we can
1085 * get rid of it eventually.
1087 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1089 phys_mem_alloc = alloc;
1092 static uint16_t phys_section_add(PhysPageMap *map,
1093 MemoryRegionSection *section)
1095 /* The physical section number is ORed with a page-aligned
1096 * pointer to produce the iotlb entries. Thus it should
1097 * never overflow into the page-aligned value.
1099 assert(map->sections_nb < TARGET_PAGE_SIZE);
1101 if (map->sections_nb == map->sections_nb_alloc) {
1102 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1103 map->sections = g_renew(MemoryRegionSection, map->sections,
1104 map->sections_nb_alloc);
1106 map->sections[map->sections_nb] = *section;
1107 memory_region_ref(section->mr);
1108 return map->sections_nb++;
1111 static void phys_section_destroy(MemoryRegion *mr)
1113 bool have_sub_page = mr->subpage;
1115 memory_region_unref(mr);
1117 if (have_sub_page) {
1118 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1119 object_unref(OBJECT(&subpage->iomem));
1120 g_free(subpage);
1124 static void phys_sections_free(PhysPageMap *map)
1126 while (map->sections_nb > 0) {
1127 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1128 phys_section_destroy(section->mr);
1130 g_free(map->sections);
1131 g_free(map->nodes);
1134 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1136 subpage_t *subpage;
1137 hwaddr base = section->offset_within_address_space
1138 & TARGET_PAGE_MASK;
1139 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1140 d->map.nodes, d->map.sections);
1141 MemoryRegionSection subsection = {
1142 .offset_within_address_space = base,
1143 .size = int128_make64(TARGET_PAGE_SIZE),
1145 hwaddr start, end;
1147 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1149 if (!(existing->mr->subpage)) {
1150 subpage = subpage_init(d->as, base);
1151 subsection.address_space = d->as;
1152 subsection.mr = &subpage->iomem;
1153 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1154 phys_section_add(&d->map, &subsection));
1155 } else {
1156 subpage = container_of(existing->mr, subpage_t, iomem);
1158 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1159 end = start + int128_get64(section->size) - 1;
1160 subpage_register(subpage, start, end,
1161 phys_section_add(&d->map, section));
1165 static void register_multipage(AddressSpaceDispatch *d,
1166 MemoryRegionSection *section)
1168 hwaddr start_addr = section->offset_within_address_space;
1169 uint16_t section_index = phys_section_add(&d->map, section);
1170 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1171 TARGET_PAGE_BITS));
1173 assert(num_pages);
1174 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1177 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1179 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1180 AddressSpaceDispatch *d = as->next_dispatch;
1181 MemoryRegionSection now = *section, remain = *section;
1182 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1184 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1185 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1186 - now.offset_within_address_space;
1188 now.size = int128_min(int128_make64(left), now.size);
1189 register_subpage(d, &now);
1190 } else {
1191 now.size = int128_zero();
1193 while (int128_ne(remain.size, now.size)) {
1194 remain.size = int128_sub(remain.size, now.size);
1195 remain.offset_within_address_space += int128_get64(now.size);
1196 remain.offset_within_region += int128_get64(now.size);
1197 now = remain;
1198 if (int128_lt(remain.size, page_size)) {
1199 register_subpage(d, &now);
1200 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1201 now.size = page_size;
1202 register_subpage(d, &now);
1203 } else {
1204 now.size = int128_and(now.size, int128_neg(page_size));
1205 register_multipage(d, &now);
1210 void qemu_flush_coalesced_mmio_buffer(void)
1212 if (kvm_enabled())
1213 kvm_flush_coalesced_mmio_buffer();
1216 void qemu_mutex_lock_ramlist(void)
1218 qemu_mutex_lock(&ram_list.mutex);
1221 void qemu_mutex_unlock_ramlist(void)
1223 qemu_mutex_unlock(&ram_list.mutex);
1226 #ifdef __linux__
1227 static void *file_ram_alloc(RAMBlock *block,
1228 ram_addr_t memory,
1229 const char *path,
1230 Error **errp)
1232 bool unlink_on_error = false;
1233 char *filename;
1234 char *sanitized_name;
1235 char *c;
1236 void *area = MAP_FAILED;
1237 int fd = -1;
1238 int64_t page_size;
1240 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1241 error_setg(errp,
1242 "host lacks kvm mmu notifiers, -mem-path unsupported");
1243 return NULL;
1246 for (;;) {
1247 fd = open(path, O_RDWR);
1248 if (fd >= 0) {
1249 /* @path names an existing file, use it */
1250 break;
1252 if (errno == ENOENT) {
1253 /* @path names a file that doesn't exist, create it */
1254 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1255 if (fd >= 0) {
1256 unlink_on_error = true;
1257 break;
1259 } else if (errno == EISDIR) {
1260 /* @path names a directory, create a file there */
1261 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1262 sanitized_name = g_strdup(memory_region_name(block->mr));
1263 for (c = sanitized_name; *c != '\0'; c++) {
1264 if (*c == '/') {
1265 *c = '_';
1269 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1270 sanitized_name);
1271 g_free(sanitized_name);
1273 fd = mkstemp(filename);
1274 if (fd >= 0) {
1275 unlink(filename);
1276 g_free(filename);
1277 break;
1279 g_free(filename);
1281 if (errno != EEXIST && errno != EINTR) {
1282 error_setg_errno(errp, errno,
1283 "can't open backing store %s for guest RAM",
1284 path);
1285 goto error;
1288 * Try again on EINTR and EEXIST. The latter happens when
1289 * something else creates the file between our two open().
1293 page_size = qemu_fd_getpagesize(fd);
1294 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1296 if (memory < page_size) {
1297 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1298 "or larger than page size 0x%" PRIx64,
1299 memory, page_size);
1300 goto error;
1303 memory = ROUND_UP(memory, page_size);
1306 * ftruncate is not supported by hugetlbfs in older
1307 * hosts, so don't bother bailing out on errors.
1308 * If anything goes wrong with it under other filesystems,
1309 * mmap will fail.
1311 if (ftruncate(fd, memory)) {
1312 perror("ftruncate");
1315 area = qemu_ram_mmap(fd, memory, block->mr->align,
1316 block->flags & RAM_SHARED);
1317 if (area == MAP_FAILED) {
1318 error_setg_errno(errp, errno,
1319 "unable to map backing store for guest RAM");
1320 goto error;
1323 if (mem_prealloc) {
1324 os_mem_prealloc(fd, area, memory, errp);
1325 if (errp && *errp) {
1326 goto error;
1330 block->fd = fd;
1331 return area;
1333 error:
1334 if (area != MAP_FAILED) {
1335 qemu_ram_munmap(area, memory);
1337 if (unlink_on_error) {
1338 unlink(path);
1340 if (fd != -1) {
1341 close(fd);
1343 return NULL;
1345 #endif
1347 /* Called with the ramlist lock held. */
1348 static ram_addr_t find_ram_offset(ram_addr_t size)
1350 RAMBlock *block, *next_block;
1351 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1353 assert(size != 0); /* it would hand out same offset multiple times */
1355 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1356 return 0;
1359 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1360 ram_addr_t end, next = RAM_ADDR_MAX;
1362 end = block->offset + block->max_length;
1364 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1365 if (next_block->offset >= end) {
1366 next = MIN(next, next_block->offset);
1369 if (next - end >= size && next - end < mingap) {
1370 offset = end;
1371 mingap = next - end;
1375 if (offset == RAM_ADDR_MAX) {
1376 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1377 (uint64_t)size);
1378 abort();
1381 return offset;
1384 ram_addr_t last_ram_offset(void)
1386 RAMBlock *block;
1387 ram_addr_t last = 0;
1389 rcu_read_lock();
1390 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1391 last = MAX(last, block->offset + block->max_length);
1393 rcu_read_unlock();
1394 return last;
1397 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1399 int ret;
1401 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1402 if (!machine_dump_guest_core(current_machine)) {
1403 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1404 if (ret) {
1405 perror("qemu_madvise");
1406 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1407 "but dump_guest_core=off specified\n");
1412 const char *qemu_ram_get_idstr(RAMBlock *rb)
1414 return rb->idstr;
1417 /* Called with iothread lock held. */
1418 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1420 RAMBlock *block;
1422 assert(new_block);
1423 assert(!new_block->idstr[0]);
1425 if (dev) {
1426 char *id = qdev_get_dev_path(dev);
1427 if (id) {
1428 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1429 g_free(id);
1432 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1434 rcu_read_lock();
1435 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1436 if (block != new_block &&
1437 !strcmp(block->idstr, new_block->idstr)) {
1438 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1439 new_block->idstr);
1440 abort();
1443 rcu_read_unlock();
1446 /* Called with iothread lock held. */
1447 void qemu_ram_unset_idstr(RAMBlock *block)
1449 /* FIXME: arch_init.c assumes that this is not called throughout
1450 * migration. Ignore the problem since hot-unplug during migration
1451 * does not work anyway.
1453 if (block) {
1454 memset(block->idstr, 0, sizeof(block->idstr));
1458 static int memory_try_enable_merging(void *addr, size_t len)
1460 if (!machine_mem_merge(current_machine)) {
1461 /* disabled by the user */
1462 return 0;
1465 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1468 /* Only legal before guest might have detected the memory size: e.g. on
1469 * incoming migration, or right after reset.
1471 * As memory core doesn't know how is memory accessed, it is up to
1472 * resize callback to update device state and/or add assertions to detect
1473 * misuse, if necessary.
1475 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1477 assert(block);
1479 newsize = HOST_PAGE_ALIGN(newsize);
1481 if (block->used_length == newsize) {
1482 return 0;
1485 if (!(block->flags & RAM_RESIZEABLE)) {
1486 error_setg_errno(errp, EINVAL,
1487 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1488 " in != 0x" RAM_ADDR_FMT, block->idstr,
1489 newsize, block->used_length);
1490 return -EINVAL;
1493 if (block->max_length < newsize) {
1494 error_setg_errno(errp, EINVAL,
1495 "Length too large: %s: 0x" RAM_ADDR_FMT
1496 " > 0x" RAM_ADDR_FMT, block->idstr,
1497 newsize, block->max_length);
1498 return -EINVAL;
1501 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1502 block->used_length = newsize;
1503 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1504 DIRTY_CLIENTS_ALL);
1505 memory_region_set_size(block->mr, newsize);
1506 if (block->resized) {
1507 block->resized(block->idstr, newsize, block->host);
1509 return 0;
1512 /* Called with ram_list.mutex held */
1513 static void dirty_memory_extend(ram_addr_t old_ram_size,
1514 ram_addr_t new_ram_size)
1516 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1517 DIRTY_MEMORY_BLOCK_SIZE);
1518 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1519 DIRTY_MEMORY_BLOCK_SIZE);
1520 int i;
1522 /* Only need to extend if block count increased */
1523 if (new_num_blocks <= old_num_blocks) {
1524 return;
1527 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1528 DirtyMemoryBlocks *old_blocks;
1529 DirtyMemoryBlocks *new_blocks;
1530 int j;
1532 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1533 new_blocks = g_malloc(sizeof(*new_blocks) +
1534 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1536 if (old_num_blocks) {
1537 memcpy(new_blocks->blocks, old_blocks->blocks,
1538 old_num_blocks * sizeof(old_blocks->blocks[0]));
1541 for (j = old_num_blocks; j < new_num_blocks; j++) {
1542 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1545 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1547 if (old_blocks) {
1548 g_free_rcu(old_blocks, rcu);
1553 static void ram_block_add(RAMBlock *new_block, Error **errp)
1555 RAMBlock *block;
1556 RAMBlock *last_block = NULL;
1557 ram_addr_t old_ram_size, new_ram_size;
1558 Error *err = NULL;
1560 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1562 qemu_mutex_lock_ramlist();
1563 new_block->offset = find_ram_offset(new_block->max_length);
1565 if (!new_block->host) {
1566 if (xen_enabled()) {
1567 xen_ram_alloc(new_block->offset, new_block->max_length,
1568 new_block->mr, &err);
1569 if (err) {
1570 error_propagate(errp, err);
1571 qemu_mutex_unlock_ramlist();
1572 return;
1574 } else {
1575 new_block->host = phys_mem_alloc(new_block->max_length,
1576 &new_block->mr->align);
1577 if (!new_block->host) {
1578 error_setg_errno(errp, errno,
1579 "cannot set up guest memory '%s'",
1580 memory_region_name(new_block->mr));
1581 qemu_mutex_unlock_ramlist();
1582 return;
1584 memory_try_enable_merging(new_block->host, new_block->max_length);
1588 new_ram_size = MAX(old_ram_size,
1589 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1590 if (new_ram_size > old_ram_size) {
1591 migration_bitmap_extend(old_ram_size, new_ram_size);
1592 dirty_memory_extend(old_ram_size, new_ram_size);
1594 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1595 * QLIST (which has an RCU-friendly variant) does not have insertion at
1596 * tail, so save the last element in last_block.
1598 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1599 last_block = block;
1600 if (block->max_length < new_block->max_length) {
1601 break;
1604 if (block) {
1605 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1606 } else if (last_block) {
1607 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1608 } else { /* list is empty */
1609 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1611 ram_list.mru_block = NULL;
1613 /* Write list before version */
1614 smp_wmb();
1615 ram_list.version++;
1616 qemu_mutex_unlock_ramlist();
1618 cpu_physical_memory_set_dirty_range(new_block->offset,
1619 new_block->used_length,
1620 DIRTY_CLIENTS_ALL);
1622 if (new_block->host) {
1623 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1624 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1625 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1626 if (kvm_enabled()) {
1627 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1632 #ifdef __linux__
1633 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1634 bool share, const char *mem_path,
1635 Error **errp)
1637 RAMBlock *new_block;
1638 Error *local_err = NULL;
1640 if (xen_enabled()) {
1641 error_setg(errp, "-mem-path not supported with Xen");
1642 return NULL;
1645 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1647 * file_ram_alloc() needs to allocate just like
1648 * phys_mem_alloc, but we haven't bothered to provide
1649 * a hook there.
1651 error_setg(errp,
1652 "-mem-path not supported with this accelerator");
1653 return NULL;
1656 size = HOST_PAGE_ALIGN(size);
1657 new_block = g_malloc0(sizeof(*new_block));
1658 new_block->mr = mr;
1659 new_block->used_length = size;
1660 new_block->max_length = size;
1661 new_block->flags = share ? RAM_SHARED : 0;
1662 new_block->host = file_ram_alloc(new_block, size,
1663 mem_path, errp);
1664 if (!new_block->host) {
1665 g_free(new_block);
1666 return NULL;
1669 ram_block_add(new_block, &local_err);
1670 if (local_err) {
1671 g_free(new_block);
1672 error_propagate(errp, local_err);
1673 return NULL;
1675 return new_block;
1677 #endif
1679 static
1680 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1681 void (*resized)(const char*,
1682 uint64_t length,
1683 void *host),
1684 void *host, bool resizeable,
1685 MemoryRegion *mr, Error **errp)
1687 RAMBlock *new_block;
1688 Error *local_err = NULL;
1690 size = HOST_PAGE_ALIGN(size);
1691 max_size = HOST_PAGE_ALIGN(max_size);
1692 new_block = g_malloc0(sizeof(*new_block));
1693 new_block->mr = mr;
1694 new_block->resized = resized;
1695 new_block->used_length = size;
1696 new_block->max_length = max_size;
1697 assert(max_size >= size);
1698 new_block->fd = -1;
1699 new_block->host = host;
1700 if (host) {
1701 new_block->flags |= RAM_PREALLOC;
1703 if (resizeable) {
1704 new_block->flags |= RAM_RESIZEABLE;
1706 ram_block_add(new_block, &local_err);
1707 if (local_err) {
1708 g_free(new_block);
1709 error_propagate(errp, local_err);
1710 return NULL;
1712 return new_block;
1715 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1716 MemoryRegion *mr, Error **errp)
1718 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1721 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1723 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1726 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1727 void (*resized)(const char*,
1728 uint64_t length,
1729 void *host),
1730 MemoryRegion *mr, Error **errp)
1732 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1735 static void reclaim_ramblock(RAMBlock *block)
1737 if (block->flags & RAM_PREALLOC) {
1739 } else if (xen_enabled()) {
1740 xen_invalidate_map_cache_entry(block->host);
1741 #ifndef _WIN32
1742 } else if (block->fd >= 0) {
1743 qemu_ram_munmap(block->host, block->max_length);
1744 close(block->fd);
1745 #endif
1746 } else {
1747 qemu_anon_ram_free(block->host, block->max_length);
1749 g_free(block);
1752 void qemu_ram_free(RAMBlock *block)
1754 if (!block) {
1755 return;
1758 qemu_mutex_lock_ramlist();
1759 QLIST_REMOVE_RCU(block, next);
1760 ram_list.mru_block = NULL;
1761 /* Write list before version */
1762 smp_wmb();
1763 ram_list.version++;
1764 call_rcu(block, reclaim_ramblock, rcu);
1765 qemu_mutex_unlock_ramlist();
1768 #ifndef _WIN32
1769 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1771 RAMBlock *block;
1772 ram_addr_t offset;
1773 int flags;
1774 void *area, *vaddr;
1776 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1777 offset = addr - block->offset;
1778 if (offset < block->max_length) {
1779 vaddr = ramblock_ptr(block, offset);
1780 if (block->flags & RAM_PREALLOC) {
1782 } else if (xen_enabled()) {
1783 abort();
1784 } else {
1785 flags = MAP_FIXED;
1786 if (block->fd >= 0) {
1787 flags |= (block->flags & RAM_SHARED ?
1788 MAP_SHARED : MAP_PRIVATE);
1789 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1790 flags, block->fd, offset);
1791 } else {
1793 * Remap needs to match alloc. Accelerators that
1794 * set phys_mem_alloc never remap. If they did,
1795 * we'd need a remap hook here.
1797 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1799 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1800 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1801 flags, -1, 0);
1803 if (area != vaddr) {
1804 fprintf(stderr, "Could not remap addr: "
1805 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1806 length, addr);
1807 exit(1);
1809 memory_try_enable_merging(vaddr, length);
1810 qemu_ram_setup_dump(vaddr, length);
1815 #endif /* !_WIN32 */
1817 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1818 * This should not be used for general purpose DMA. Use address_space_map
1819 * or address_space_rw instead. For local memory (e.g. video ram) that the
1820 * device owns, use memory_region_get_ram_ptr.
1822 * Called within RCU critical section.
1824 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1826 RAMBlock *block = ram_block;
1828 if (block == NULL) {
1829 block = qemu_get_ram_block(addr);
1830 addr -= block->offset;
1833 if (xen_enabled() && block->host == NULL) {
1834 /* We need to check if the requested address is in the RAM
1835 * because we don't want to map the entire memory in QEMU.
1836 * In that case just map until the end of the page.
1838 if (block->offset == 0) {
1839 return xen_map_cache(addr, 0, 0);
1842 block->host = xen_map_cache(block->offset, block->max_length, 1);
1844 return ramblock_ptr(block, addr);
1847 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1848 * but takes a size argument.
1850 * Called within RCU critical section.
1852 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1853 hwaddr *size)
1855 RAMBlock *block = ram_block;
1856 if (*size == 0) {
1857 return NULL;
1860 if (block == NULL) {
1861 block = qemu_get_ram_block(addr);
1862 addr -= block->offset;
1864 *size = MIN(*size, block->max_length - addr);
1866 if (xen_enabled() && block->host == NULL) {
1867 /* We need to check if the requested address is in the RAM
1868 * because we don't want to map the entire memory in QEMU.
1869 * In that case just map the requested area.
1871 if (block->offset == 0) {
1872 return xen_map_cache(addr, *size, 1);
1875 block->host = xen_map_cache(block->offset, block->max_length, 1);
1878 return ramblock_ptr(block, addr);
1882 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1883 * in that RAMBlock.
1885 * ptr: Host pointer to look up
1886 * round_offset: If true round the result offset down to a page boundary
1887 * *ram_addr: set to result ram_addr
1888 * *offset: set to result offset within the RAMBlock
1890 * Returns: RAMBlock (or NULL if not found)
1892 * By the time this function returns, the returned pointer is not protected
1893 * by RCU anymore. If the caller is not within an RCU critical section and
1894 * does not hold the iothread lock, it must have other means of protecting the
1895 * pointer, such as a reference to the region that includes the incoming
1896 * ram_addr_t.
1898 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1899 ram_addr_t *offset)
1901 RAMBlock *block;
1902 uint8_t *host = ptr;
1904 if (xen_enabled()) {
1905 ram_addr_t ram_addr;
1906 rcu_read_lock();
1907 ram_addr = xen_ram_addr_from_mapcache(ptr);
1908 block = qemu_get_ram_block(ram_addr);
1909 if (block) {
1910 *offset = ram_addr - block->offset;
1912 rcu_read_unlock();
1913 return block;
1916 rcu_read_lock();
1917 block = atomic_rcu_read(&ram_list.mru_block);
1918 if (block && block->host && host - block->host < block->max_length) {
1919 goto found;
1922 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1923 /* This case append when the block is not mapped. */
1924 if (block->host == NULL) {
1925 continue;
1927 if (host - block->host < block->max_length) {
1928 goto found;
1932 rcu_read_unlock();
1933 return NULL;
1935 found:
1936 *offset = (host - block->host);
1937 if (round_offset) {
1938 *offset &= TARGET_PAGE_MASK;
1940 rcu_read_unlock();
1941 return block;
1945 * Finds the named RAMBlock
1947 * name: The name of RAMBlock to find
1949 * Returns: RAMBlock (or NULL if not found)
1951 RAMBlock *qemu_ram_block_by_name(const char *name)
1953 RAMBlock *block;
1955 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1956 if (!strcmp(name, block->idstr)) {
1957 return block;
1961 return NULL;
1964 /* Some of the softmmu routines need to translate from a host pointer
1965 (typically a TLB entry) back to a ram offset. */
1966 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1968 RAMBlock *block;
1969 ram_addr_t offset;
1971 block = qemu_ram_block_from_host(ptr, false, &offset);
1972 if (!block) {
1973 return RAM_ADDR_INVALID;
1976 return block->offset + offset;
1979 /* Called within RCU critical section. */
1980 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1981 uint64_t val, unsigned size)
1983 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1984 tb_invalidate_phys_page_fast(ram_addr, size);
1986 switch (size) {
1987 case 1:
1988 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1989 break;
1990 case 2:
1991 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1992 break;
1993 case 4:
1994 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1995 break;
1996 default:
1997 abort();
1999 /* Set both VGA and migration bits for simplicity and to remove
2000 * the notdirty callback faster.
2002 cpu_physical_memory_set_dirty_range(ram_addr, size,
2003 DIRTY_CLIENTS_NOCODE);
2004 /* we remove the notdirty callback only if the code has been
2005 flushed */
2006 if (!cpu_physical_memory_is_clean(ram_addr)) {
2007 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2011 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2012 unsigned size, bool is_write)
2014 return is_write;
2017 static const MemoryRegionOps notdirty_mem_ops = {
2018 .write = notdirty_mem_write,
2019 .valid.accepts = notdirty_mem_accepts,
2020 .endianness = DEVICE_NATIVE_ENDIAN,
2023 /* Generate a debug exception if a watchpoint has been hit. */
2024 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2026 CPUState *cpu = current_cpu;
2027 CPUClass *cc = CPU_GET_CLASS(cpu);
2028 CPUArchState *env = cpu->env_ptr;
2029 target_ulong pc, cs_base;
2030 target_ulong vaddr;
2031 CPUWatchpoint *wp;
2032 uint32_t cpu_flags;
2034 if (cpu->watchpoint_hit) {
2035 /* We re-entered the check after replacing the TB. Now raise
2036 * the debug interrupt so that is will trigger after the
2037 * current instruction. */
2038 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2039 return;
2041 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2042 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2043 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2044 && (wp->flags & flags)) {
2045 if (flags == BP_MEM_READ) {
2046 wp->flags |= BP_WATCHPOINT_HIT_READ;
2047 } else {
2048 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2050 wp->hitaddr = vaddr;
2051 wp->hitattrs = attrs;
2052 if (!cpu->watchpoint_hit) {
2053 if (wp->flags & BP_CPU &&
2054 !cc->debug_check_watchpoint(cpu, wp)) {
2055 wp->flags &= ~BP_WATCHPOINT_HIT;
2056 continue;
2058 cpu->watchpoint_hit = wp;
2059 tb_check_watchpoint(cpu);
2060 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2061 cpu->exception_index = EXCP_DEBUG;
2062 cpu_loop_exit(cpu);
2063 } else {
2064 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2065 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2066 cpu_loop_exit_noexc(cpu);
2069 } else {
2070 wp->flags &= ~BP_WATCHPOINT_HIT;
2075 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2076 so these check for a hit then pass through to the normal out-of-line
2077 phys routines. */
2078 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2079 unsigned size, MemTxAttrs attrs)
2081 MemTxResult res;
2082 uint64_t data;
2083 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2084 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2086 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2087 switch (size) {
2088 case 1:
2089 data = address_space_ldub(as, addr, attrs, &res);
2090 break;
2091 case 2:
2092 data = address_space_lduw(as, addr, attrs, &res);
2093 break;
2094 case 4:
2095 data = address_space_ldl(as, addr, attrs, &res);
2096 break;
2097 default: abort();
2099 *pdata = data;
2100 return res;
2103 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2104 uint64_t val, unsigned size,
2105 MemTxAttrs attrs)
2107 MemTxResult res;
2108 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2109 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2111 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2112 switch (size) {
2113 case 1:
2114 address_space_stb(as, addr, val, attrs, &res);
2115 break;
2116 case 2:
2117 address_space_stw(as, addr, val, attrs, &res);
2118 break;
2119 case 4:
2120 address_space_stl(as, addr, val, attrs, &res);
2121 break;
2122 default: abort();
2124 return res;
2127 static const MemoryRegionOps watch_mem_ops = {
2128 .read_with_attrs = watch_mem_read,
2129 .write_with_attrs = watch_mem_write,
2130 .endianness = DEVICE_NATIVE_ENDIAN,
2133 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2134 unsigned len, MemTxAttrs attrs)
2136 subpage_t *subpage = opaque;
2137 uint8_t buf[8];
2138 MemTxResult res;
2140 #if defined(DEBUG_SUBPAGE)
2141 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2142 subpage, len, addr);
2143 #endif
2144 res = address_space_read(subpage->as, addr + subpage->base,
2145 attrs, buf, len);
2146 if (res) {
2147 return res;
2149 switch (len) {
2150 case 1:
2151 *data = ldub_p(buf);
2152 return MEMTX_OK;
2153 case 2:
2154 *data = lduw_p(buf);
2155 return MEMTX_OK;
2156 case 4:
2157 *data = ldl_p(buf);
2158 return MEMTX_OK;
2159 case 8:
2160 *data = ldq_p(buf);
2161 return MEMTX_OK;
2162 default:
2163 abort();
2167 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2168 uint64_t value, unsigned len, MemTxAttrs attrs)
2170 subpage_t *subpage = opaque;
2171 uint8_t buf[8];
2173 #if defined(DEBUG_SUBPAGE)
2174 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2175 " value %"PRIx64"\n",
2176 __func__, subpage, len, addr, value);
2177 #endif
2178 switch (len) {
2179 case 1:
2180 stb_p(buf, value);
2181 break;
2182 case 2:
2183 stw_p(buf, value);
2184 break;
2185 case 4:
2186 stl_p(buf, value);
2187 break;
2188 case 8:
2189 stq_p(buf, value);
2190 break;
2191 default:
2192 abort();
2194 return address_space_write(subpage->as, addr + subpage->base,
2195 attrs, buf, len);
2198 static bool subpage_accepts(void *opaque, hwaddr addr,
2199 unsigned len, bool is_write)
2201 subpage_t *subpage = opaque;
2202 #if defined(DEBUG_SUBPAGE)
2203 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2204 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2205 #endif
2207 return address_space_access_valid(subpage->as, addr + subpage->base,
2208 len, is_write);
2211 static const MemoryRegionOps subpage_ops = {
2212 .read_with_attrs = subpage_read,
2213 .write_with_attrs = subpage_write,
2214 .impl.min_access_size = 1,
2215 .impl.max_access_size = 8,
2216 .valid.min_access_size = 1,
2217 .valid.max_access_size = 8,
2218 .valid.accepts = subpage_accepts,
2219 .endianness = DEVICE_NATIVE_ENDIAN,
2222 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2223 uint16_t section)
2225 int idx, eidx;
2227 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2228 return -1;
2229 idx = SUBPAGE_IDX(start);
2230 eidx = SUBPAGE_IDX(end);
2231 #if defined(DEBUG_SUBPAGE)
2232 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2233 __func__, mmio, start, end, idx, eidx, section);
2234 #endif
2235 for (; idx <= eidx; idx++) {
2236 mmio->sub_section[idx] = section;
2239 return 0;
2242 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2244 subpage_t *mmio;
2246 mmio = g_malloc0(sizeof(subpage_t));
2248 mmio->as = as;
2249 mmio->base = base;
2250 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2251 NULL, TARGET_PAGE_SIZE);
2252 mmio->iomem.subpage = true;
2253 #if defined(DEBUG_SUBPAGE)
2254 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2255 mmio, base, TARGET_PAGE_SIZE);
2256 #endif
2257 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2259 return mmio;
2262 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2263 MemoryRegion *mr)
2265 assert(as);
2266 MemoryRegionSection section = {
2267 .address_space = as,
2268 .mr = mr,
2269 .offset_within_address_space = 0,
2270 .offset_within_region = 0,
2271 .size = int128_2_64(),
2274 return phys_section_add(map, &section);
2277 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2279 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2280 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2281 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2282 MemoryRegionSection *sections = d->map.sections;
2284 return sections[index & ~TARGET_PAGE_MASK].mr;
2287 static void io_mem_init(void)
2289 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2290 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2291 NULL, UINT64_MAX);
2292 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2293 NULL, UINT64_MAX);
2294 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2295 NULL, UINT64_MAX);
2298 static void mem_begin(MemoryListener *listener)
2300 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2301 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2302 uint16_t n;
2304 n = dummy_section(&d->map, as, &io_mem_unassigned);
2305 assert(n == PHYS_SECTION_UNASSIGNED);
2306 n = dummy_section(&d->map, as, &io_mem_notdirty);
2307 assert(n == PHYS_SECTION_NOTDIRTY);
2308 n = dummy_section(&d->map, as, &io_mem_rom);
2309 assert(n == PHYS_SECTION_ROM);
2310 n = dummy_section(&d->map, as, &io_mem_watch);
2311 assert(n == PHYS_SECTION_WATCH);
2313 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2314 d->as = as;
2315 as->next_dispatch = d;
2318 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2320 phys_sections_free(&d->map);
2321 g_free(d);
2324 static void mem_commit(MemoryListener *listener)
2326 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2327 AddressSpaceDispatch *cur = as->dispatch;
2328 AddressSpaceDispatch *next = as->next_dispatch;
2330 phys_page_compact_all(next, next->map.nodes_nb);
2332 atomic_rcu_set(&as->dispatch, next);
2333 if (cur) {
2334 call_rcu(cur, address_space_dispatch_free, rcu);
2338 static void tcg_commit(MemoryListener *listener)
2340 CPUAddressSpace *cpuas;
2341 AddressSpaceDispatch *d;
2343 /* since each CPU stores ram addresses in its TLB cache, we must
2344 reset the modified entries */
2345 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2346 cpu_reloading_memory_map();
2347 /* The CPU and TLB are protected by the iothread lock.
2348 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2349 * may have split the RCU critical section.
2351 d = atomic_rcu_read(&cpuas->as->dispatch);
2352 cpuas->memory_dispatch = d;
2353 tlb_flush(cpuas->cpu, 1);
2356 void address_space_init_dispatch(AddressSpace *as)
2358 as->dispatch = NULL;
2359 as->dispatch_listener = (MemoryListener) {
2360 .begin = mem_begin,
2361 .commit = mem_commit,
2362 .region_add = mem_add,
2363 .region_nop = mem_add,
2364 .priority = 0,
2366 memory_listener_register(&as->dispatch_listener, as);
2369 void address_space_unregister(AddressSpace *as)
2371 memory_listener_unregister(&as->dispatch_listener);
2374 void address_space_destroy_dispatch(AddressSpace *as)
2376 AddressSpaceDispatch *d = as->dispatch;
2378 atomic_rcu_set(&as->dispatch, NULL);
2379 if (d) {
2380 call_rcu(d, address_space_dispatch_free, rcu);
2384 static void memory_map_init(void)
2386 system_memory = g_malloc(sizeof(*system_memory));
2388 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2389 address_space_init(&address_space_memory, system_memory, "memory");
2391 system_io = g_malloc(sizeof(*system_io));
2392 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2393 65536);
2394 address_space_init(&address_space_io, system_io, "I/O");
2397 MemoryRegion *get_system_memory(void)
2399 return system_memory;
2402 MemoryRegion *get_system_io(void)
2404 return system_io;
2407 #endif /* !defined(CONFIG_USER_ONLY) */
2409 /* physical memory access (slow version, mainly for debug) */
2410 #if defined(CONFIG_USER_ONLY)
2411 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2412 uint8_t *buf, int len, int is_write)
2414 int l, flags;
2415 target_ulong page;
2416 void * p;
2418 while (len > 0) {
2419 page = addr & TARGET_PAGE_MASK;
2420 l = (page + TARGET_PAGE_SIZE) - addr;
2421 if (l > len)
2422 l = len;
2423 flags = page_get_flags(page);
2424 if (!(flags & PAGE_VALID))
2425 return -1;
2426 if (is_write) {
2427 if (!(flags & PAGE_WRITE))
2428 return -1;
2429 /* XXX: this code should not depend on lock_user */
2430 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2431 return -1;
2432 memcpy(p, buf, l);
2433 unlock_user(p, addr, l);
2434 } else {
2435 if (!(flags & PAGE_READ))
2436 return -1;
2437 /* XXX: this code should not depend on lock_user */
2438 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2439 return -1;
2440 memcpy(buf, p, l);
2441 unlock_user(p, addr, 0);
2443 len -= l;
2444 buf += l;
2445 addr += l;
2447 return 0;
2450 #else
2452 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2453 hwaddr length)
2455 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2456 addr += memory_region_get_ram_addr(mr);
2458 /* No early return if dirty_log_mask is or becomes 0, because
2459 * cpu_physical_memory_set_dirty_range will still call
2460 * xen_modified_memory.
2462 if (dirty_log_mask) {
2463 dirty_log_mask =
2464 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2466 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2467 tb_invalidate_phys_range(addr, addr + length);
2468 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2470 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2473 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2475 unsigned access_size_max = mr->ops->valid.max_access_size;
2477 /* Regions are assumed to support 1-4 byte accesses unless
2478 otherwise specified. */
2479 if (access_size_max == 0) {
2480 access_size_max = 4;
2483 /* Bound the maximum access by the alignment of the address. */
2484 if (!mr->ops->impl.unaligned) {
2485 unsigned align_size_max = addr & -addr;
2486 if (align_size_max != 0 && align_size_max < access_size_max) {
2487 access_size_max = align_size_max;
2491 /* Don't attempt accesses larger than the maximum. */
2492 if (l > access_size_max) {
2493 l = access_size_max;
2495 l = pow2floor(l);
2497 return l;
2500 static bool prepare_mmio_access(MemoryRegion *mr)
2502 bool unlocked = !qemu_mutex_iothread_locked();
2503 bool release_lock = false;
2505 if (unlocked && mr->global_locking) {
2506 qemu_mutex_lock_iothread();
2507 unlocked = false;
2508 release_lock = true;
2510 if (mr->flush_coalesced_mmio) {
2511 if (unlocked) {
2512 qemu_mutex_lock_iothread();
2514 qemu_flush_coalesced_mmio_buffer();
2515 if (unlocked) {
2516 qemu_mutex_unlock_iothread();
2520 return release_lock;
2523 /* Called within RCU critical section. */
2524 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2525 MemTxAttrs attrs,
2526 const uint8_t *buf,
2527 int len, hwaddr addr1,
2528 hwaddr l, MemoryRegion *mr)
2530 uint8_t *ptr;
2531 uint64_t val;
2532 MemTxResult result = MEMTX_OK;
2533 bool release_lock = false;
2535 for (;;) {
2536 if (!memory_access_is_direct(mr, true)) {
2537 release_lock |= prepare_mmio_access(mr);
2538 l = memory_access_size(mr, l, addr1);
2539 /* XXX: could force current_cpu to NULL to avoid
2540 potential bugs */
2541 switch (l) {
2542 case 8:
2543 /* 64 bit write access */
2544 val = ldq_p(buf);
2545 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2546 attrs);
2547 break;
2548 case 4:
2549 /* 32 bit write access */
2550 val = ldl_p(buf);
2551 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2552 attrs);
2553 break;
2554 case 2:
2555 /* 16 bit write access */
2556 val = lduw_p(buf);
2557 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2558 attrs);
2559 break;
2560 case 1:
2561 /* 8 bit write access */
2562 val = ldub_p(buf);
2563 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2564 attrs);
2565 break;
2566 default:
2567 abort();
2569 } else {
2570 /* RAM case */
2571 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2572 memcpy(ptr, buf, l);
2573 invalidate_and_set_dirty(mr, addr1, l);
2576 if (release_lock) {
2577 qemu_mutex_unlock_iothread();
2578 release_lock = false;
2581 len -= l;
2582 buf += l;
2583 addr += l;
2585 if (!len) {
2586 break;
2589 l = len;
2590 mr = address_space_translate(as, addr, &addr1, &l, true);
2593 return result;
2596 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2597 const uint8_t *buf, int len)
2599 hwaddr l;
2600 hwaddr addr1;
2601 MemoryRegion *mr;
2602 MemTxResult result = MEMTX_OK;
2604 if (len > 0) {
2605 rcu_read_lock();
2606 l = len;
2607 mr = address_space_translate(as, addr, &addr1, &l, true);
2608 result = address_space_write_continue(as, addr, attrs, buf, len,
2609 addr1, l, mr);
2610 rcu_read_unlock();
2613 return result;
2616 /* Called within RCU critical section. */
2617 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2618 MemTxAttrs attrs, uint8_t *buf,
2619 int len, hwaddr addr1, hwaddr l,
2620 MemoryRegion *mr)
2622 uint8_t *ptr;
2623 uint64_t val;
2624 MemTxResult result = MEMTX_OK;
2625 bool release_lock = false;
2627 for (;;) {
2628 if (!memory_access_is_direct(mr, false)) {
2629 /* I/O case */
2630 release_lock |= prepare_mmio_access(mr);
2631 l = memory_access_size(mr, l, addr1);
2632 switch (l) {
2633 case 8:
2634 /* 64 bit read access */
2635 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2636 attrs);
2637 stq_p(buf, val);
2638 break;
2639 case 4:
2640 /* 32 bit read access */
2641 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2642 attrs);
2643 stl_p(buf, val);
2644 break;
2645 case 2:
2646 /* 16 bit read access */
2647 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2648 attrs);
2649 stw_p(buf, val);
2650 break;
2651 case 1:
2652 /* 8 bit read access */
2653 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2654 attrs);
2655 stb_p(buf, val);
2656 break;
2657 default:
2658 abort();
2660 } else {
2661 /* RAM case */
2662 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2663 memcpy(buf, ptr, l);
2666 if (release_lock) {
2667 qemu_mutex_unlock_iothread();
2668 release_lock = false;
2671 len -= l;
2672 buf += l;
2673 addr += l;
2675 if (!len) {
2676 break;
2679 l = len;
2680 mr = address_space_translate(as, addr, &addr1, &l, false);
2683 return result;
2686 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2687 MemTxAttrs attrs, uint8_t *buf, int len)
2689 hwaddr l;
2690 hwaddr addr1;
2691 MemoryRegion *mr;
2692 MemTxResult result = MEMTX_OK;
2694 if (len > 0) {
2695 rcu_read_lock();
2696 l = len;
2697 mr = address_space_translate(as, addr, &addr1, &l, false);
2698 result = address_space_read_continue(as, addr, attrs, buf, len,
2699 addr1, l, mr);
2700 rcu_read_unlock();
2703 return result;
2706 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2707 uint8_t *buf, int len, bool is_write)
2709 if (is_write) {
2710 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2711 } else {
2712 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2716 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2717 int len, int is_write)
2719 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2720 buf, len, is_write);
2723 enum write_rom_type {
2724 WRITE_DATA,
2725 FLUSH_CACHE,
2728 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2729 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2731 hwaddr l;
2732 uint8_t *ptr;
2733 hwaddr addr1;
2734 MemoryRegion *mr;
2736 rcu_read_lock();
2737 while (len > 0) {
2738 l = len;
2739 mr = address_space_translate(as, addr, &addr1, &l, true);
2741 if (!(memory_region_is_ram(mr) ||
2742 memory_region_is_romd(mr))) {
2743 l = memory_access_size(mr, l, addr1);
2744 } else {
2745 /* ROM/RAM case */
2746 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2747 switch (type) {
2748 case WRITE_DATA:
2749 memcpy(ptr, buf, l);
2750 invalidate_and_set_dirty(mr, addr1, l);
2751 break;
2752 case FLUSH_CACHE:
2753 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2754 break;
2757 len -= l;
2758 buf += l;
2759 addr += l;
2761 rcu_read_unlock();
2764 /* used for ROM loading : can write in RAM and ROM */
2765 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2766 const uint8_t *buf, int len)
2768 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2771 void cpu_flush_icache_range(hwaddr start, int len)
2774 * This function should do the same thing as an icache flush that was
2775 * triggered from within the guest. For TCG we are always cache coherent,
2776 * so there is no need to flush anything. For KVM / Xen we need to flush
2777 * the host's instruction cache at least.
2779 if (tcg_enabled()) {
2780 return;
2783 cpu_physical_memory_write_rom_internal(&address_space_memory,
2784 start, NULL, len, FLUSH_CACHE);
2787 typedef struct {
2788 MemoryRegion *mr;
2789 void *buffer;
2790 hwaddr addr;
2791 hwaddr len;
2792 bool in_use;
2793 } BounceBuffer;
2795 static BounceBuffer bounce;
2797 typedef struct MapClient {
2798 QEMUBH *bh;
2799 QLIST_ENTRY(MapClient) link;
2800 } MapClient;
2802 QemuMutex map_client_list_lock;
2803 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2804 = QLIST_HEAD_INITIALIZER(map_client_list);
2806 static void cpu_unregister_map_client_do(MapClient *client)
2808 QLIST_REMOVE(client, link);
2809 g_free(client);
2812 static void cpu_notify_map_clients_locked(void)
2814 MapClient *client;
2816 while (!QLIST_EMPTY(&map_client_list)) {
2817 client = QLIST_FIRST(&map_client_list);
2818 qemu_bh_schedule(client->bh);
2819 cpu_unregister_map_client_do(client);
2823 void cpu_register_map_client(QEMUBH *bh)
2825 MapClient *client = g_malloc(sizeof(*client));
2827 qemu_mutex_lock(&map_client_list_lock);
2828 client->bh = bh;
2829 QLIST_INSERT_HEAD(&map_client_list, client, link);
2830 if (!atomic_read(&bounce.in_use)) {
2831 cpu_notify_map_clients_locked();
2833 qemu_mutex_unlock(&map_client_list_lock);
2836 void cpu_exec_init_all(void)
2838 qemu_mutex_init(&ram_list.mutex);
2839 io_mem_init();
2840 memory_map_init();
2841 qemu_mutex_init(&map_client_list_lock);
2844 void cpu_unregister_map_client(QEMUBH *bh)
2846 MapClient *client;
2848 qemu_mutex_lock(&map_client_list_lock);
2849 QLIST_FOREACH(client, &map_client_list, link) {
2850 if (client->bh == bh) {
2851 cpu_unregister_map_client_do(client);
2852 break;
2855 qemu_mutex_unlock(&map_client_list_lock);
2858 static void cpu_notify_map_clients(void)
2860 qemu_mutex_lock(&map_client_list_lock);
2861 cpu_notify_map_clients_locked();
2862 qemu_mutex_unlock(&map_client_list_lock);
2865 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2867 MemoryRegion *mr;
2868 hwaddr l, xlat;
2870 rcu_read_lock();
2871 while (len > 0) {
2872 l = len;
2873 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2874 if (!memory_access_is_direct(mr, is_write)) {
2875 l = memory_access_size(mr, l, addr);
2876 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2877 return false;
2881 len -= l;
2882 addr += l;
2884 rcu_read_unlock();
2885 return true;
2888 /* Map a physical memory region into a host virtual address.
2889 * May map a subset of the requested range, given by and returned in *plen.
2890 * May return NULL if resources needed to perform the mapping are exhausted.
2891 * Use only for reads OR writes - not for read-modify-write operations.
2892 * Use cpu_register_map_client() to know when retrying the map operation is
2893 * likely to succeed.
2895 void *address_space_map(AddressSpace *as,
2896 hwaddr addr,
2897 hwaddr *plen,
2898 bool is_write)
2900 hwaddr len = *plen;
2901 hwaddr done = 0;
2902 hwaddr l, xlat, base;
2903 MemoryRegion *mr, *this_mr;
2904 void *ptr;
2906 if (len == 0) {
2907 return NULL;
2910 l = len;
2911 rcu_read_lock();
2912 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2914 if (!memory_access_is_direct(mr, is_write)) {
2915 if (atomic_xchg(&bounce.in_use, true)) {
2916 rcu_read_unlock();
2917 return NULL;
2919 /* Avoid unbounded allocations */
2920 l = MIN(l, TARGET_PAGE_SIZE);
2921 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2922 bounce.addr = addr;
2923 bounce.len = l;
2925 memory_region_ref(mr);
2926 bounce.mr = mr;
2927 if (!is_write) {
2928 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2929 bounce.buffer, l);
2932 rcu_read_unlock();
2933 *plen = l;
2934 return bounce.buffer;
2937 base = xlat;
2939 for (;;) {
2940 len -= l;
2941 addr += l;
2942 done += l;
2943 if (len == 0) {
2944 break;
2947 l = len;
2948 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2949 if (this_mr != mr || xlat != base + done) {
2950 break;
2954 memory_region_ref(mr);
2955 *plen = done;
2956 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2957 rcu_read_unlock();
2959 return ptr;
2962 /* Unmaps a memory region previously mapped by address_space_map().
2963 * Will also mark the memory as dirty if is_write == 1. access_len gives
2964 * the amount of memory that was actually read or written by the caller.
2966 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2967 int is_write, hwaddr access_len)
2969 if (buffer != bounce.buffer) {
2970 MemoryRegion *mr;
2971 ram_addr_t addr1;
2973 mr = memory_region_from_host(buffer, &addr1);
2974 assert(mr != NULL);
2975 if (is_write) {
2976 invalidate_and_set_dirty(mr, addr1, access_len);
2978 if (xen_enabled()) {
2979 xen_invalidate_map_cache_entry(buffer);
2981 memory_region_unref(mr);
2982 return;
2984 if (is_write) {
2985 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2986 bounce.buffer, access_len);
2988 qemu_vfree(bounce.buffer);
2989 bounce.buffer = NULL;
2990 memory_region_unref(bounce.mr);
2991 atomic_mb_set(&bounce.in_use, false);
2992 cpu_notify_map_clients();
2995 void *cpu_physical_memory_map(hwaddr addr,
2996 hwaddr *plen,
2997 int is_write)
2999 return address_space_map(&address_space_memory, addr, plen, is_write);
3002 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3003 int is_write, hwaddr access_len)
3005 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3008 /* warning: addr must be aligned */
3009 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3010 MemTxAttrs attrs,
3011 MemTxResult *result,
3012 enum device_endian endian)
3014 uint8_t *ptr;
3015 uint64_t val;
3016 MemoryRegion *mr;
3017 hwaddr l = 4;
3018 hwaddr addr1;
3019 MemTxResult r;
3020 bool release_lock = false;
3022 rcu_read_lock();
3023 mr = address_space_translate(as, addr, &addr1, &l, false);
3024 if (l < 4 || !memory_access_is_direct(mr, false)) {
3025 release_lock |= prepare_mmio_access(mr);
3027 /* I/O case */
3028 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3029 #if defined(TARGET_WORDS_BIGENDIAN)
3030 if (endian == DEVICE_LITTLE_ENDIAN) {
3031 val = bswap32(val);
3033 #else
3034 if (endian == DEVICE_BIG_ENDIAN) {
3035 val = bswap32(val);
3037 #endif
3038 } else {
3039 /* RAM case */
3040 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3041 switch (endian) {
3042 case DEVICE_LITTLE_ENDIAN:
3043 val = ldl_le_p(ptr);
3044 break;
3045 case DEVICE_BIG_ENDIAN:
3046 val = ldl_be_p(ptr);
3047 break;
3048 default:
3049 val = ldl_p(ptr);
3050 break;
3052 r = MEMTX_OK;
3054 if (result) {
3055 *result = r;
3057 if (release_lock) {
3058 qemu_mutex_unlock_iothread();
3060 rcu_read_unlock();
3061 return val;
3064 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3065 MemTxAttrs attrs, MemTxResult *result)
3067 return address_space_ldl_internal(as, addr, attrs, result,
3068 DEVICE_NATIVE_ENDIAN);
3071 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3072 MemTxAttrs attrs, MemTxResult *result)
3074 return address_space_ldl_internal(as, addr, attrs, result,
3075 DEVICE_LITTLE_ENDIAN);
3078 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3079 MemTxAttrs attrs, MemTxResult *result)
3081 return address_space_ldl_internal(as, addr, attrs, result,
3082 DEVICE_BIG_ENDIAN);
3085 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3087 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3090 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3092 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3095 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3097 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3100 /* warning: addr must be aligned */
3101 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3102 MemTxAttrs attrs,
3103 MemTxResult *result,
3104 enum device_endian endian)
3106 uint8_t *ptr;
3107 uint64_t val;
3108 MemoryRegion *mr;
3109 hwaddr l = 8;
3110 hwaddr addr1;
3111 MemTxResult r;
3112 bool release_lock = false;
3114 rcu_read_lock();
3115 mr = address_space_translate(as, addr, &addr1, &l,
3116 false);
3117 if (l < 8 || !memory_access_is_direct(mr, false)) {
3118 release_lock |= prepare_mmio_access(mr);
3120 /* I/O case */
3121 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3122 #if defined(TARGET_WORDS_BIGENDIAN)
3123 if (endian == DEVICE_LITTLE_ENDIAN) {
3124 val = bswap64(val);
3126 #else
3127 if (endian == DEVICE_BIG_ENDIAN) {
3128 val = bswap64(val);
3130 #endif
3131 } else {
3132 /* RAM case */
3133 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3134 switch (endian) {
3135 case DEVICE_LITTLE_ENDIAN:
3136 val = ldq_le_p(ptr);
3137 break;
3138 case DEVICE_BIG_ENDIAN:
3139 val = ldq_be_p(ptr);
3140 break;
3141 default:
3142 val = ldq_p(ptr);
3143 break;
3145 r = MEMTX_OK;
3147 if (result) {
3148 *result = r;
3150 if (release_lock) {
3151 qemu_mutex_unlock_iothread();
3153 rcu_read_unlock();
3154 return val;
3157 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3158 MemTxAttrs attrs, MemTxResult *result)
3160 return address_space_ldq_internal(as, addr, attrs, result,
3161 DEVICE_NATIVE_ENDIAN);
3164 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3165 MemTxAttrs attrs, MemTxResult *result)
3167 return address_space_ldq_internal(as, addr, attrs, result,
3168 DEVICE_LITTLE_ENDIAN);
3171 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3172 MemTxAttrs attrs, MemTxResult *result)
3174 return address_space_ldq_internal(as, addr, attrs, result,
3175 DEVICE_BIG_ENDIAN);
3178 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3180 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3183 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3185 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3188 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3190 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3193 /* XXX: optimize */
3194 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3195 MemTxAttrs attrs, MemTxResult *result)
3197 uint8_t val;
3198 MemTxResult r;
3200 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3201 if (result) {
3202 *result = r;
3204 return val;
3207 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3209 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3212 /* warning: addr must be aligned */
3213 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3214 hwaddr addr,
3215 MemTxAttrs attrs,
3216 MemTxResult *result,
3217 enum device_endian endian)
3219 uint8_t *ptr;
3220 uint64_t val;
3221 MemoryRegion *mr;
3222 hwaddr l = 2;
3223 hwaddr addr1;
3224 MemTxResult r;
3225 bool release_lock = false;
3227 rcu_read_lock();
3228 mr = address_space_translate(as, addr, &addr1, &l,
3229 false);
3230 if (l < 2 || !memory_access_is_direct(mr, false)) {
3231 release_lock |= prepare_mmio_access(mr);
3233 /* I/O case */
3234 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3235 #if defined(TARGET_WORDS_BIGENDIAN)
3236 if (endian == DEVICE_LITTLE_ENDIAN) {
3237 val = bswap16(val);
3239 #else
3240 if (endian == DEVICE_BIG_ENDIAN) {
3241 val = bswap16(val);
3243 #endif
3244 } else {
3245 /* RAM case */
3246 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3247 switch (endian) {
3248 case DEVICE_LITTLE_ENDIAN:
3249 val = lduw_le_p(ptr);
3250 break;
3251 case DEVICE_BIG_ENDIAN:
3252 val = lduw_be_p(ptr);
3253 break;
3254 default:
3255 val = lduw_p(ptr);
3256 break;
3258 r = MEMTX_OK;
3260 if (result) {
3261 *result = r;
3263 if (release_lock) {
3264 qemu_mutex_unlock_iothread();
3266 rcu_read_unlock();
3267 return val;
3270 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3271 MemTxAttrs attrs, MemTxResult *result)
3273 return address_space_lduw_internal(as, addr, attrs, result,
3274 DEVICE_NATIVE_ENDIAN);
3277 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3278 MemTxAttrs attrs, MemTxResult *result)
3280 return address_space_lduw_internal(as, addr, attrs, result,
3281 DEVICE_LITTLE_ENDIAN);
3284 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3285 MemTxAttrs attrs, MemTxResult *result)
3287 return address_space_lduw_internal(as, addr, attrs, result,
3288 DEVICE_BIG_ENDIAN);
3291 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3293 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3296 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3298 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3301 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3303 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3306 /* warning: addr must be aligned. The ram page is not masked as dirty
3307 and the code inside is not invalidated. It is useful if the dirty
3308 bits are used to track modified PTEs */
3309 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3310 MemTxAttrs attrs, MemTxResult *result)
3312 uint8_t *ptr;
3313 MemoryRegion *mr;
3314 hwaddr l = 4;
3315 hwaddr addr1;
3316 MemTxResult r;
3317 uint8_t dirty_log_mask;
3318 bool release_lock = false;
3320 rcu_read_lock();
3321 mr = address_space_translate(as, addr, &addr1, &l,
3322 true);
3323 if (l < 4 || !memory_access_is_direct(mr, true)) {
3324 release_lock |= prepare_mmio_access(mr);
3326 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3327 } else {
3328 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3329 stl_p(ptr, val);
3331 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3332 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3333 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3334 4, dirty_log_mask);
3335 r = MEMTX_OK;
3337 if (result) {
3338 *result = r;
3340 if (release_lock) {
3341 qemu_mutex_unlock_iothread();
3343 rcu_read_unlock();
3346 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3348 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3351 /* warning: addr must be aligned */
3352 static inline void address_space_stl_internal(AddressSpace *as,
3353 hwaddr addr, uint32_t val,
3354 MemTxAttrs attrs,
3355 MemTxResult *result,
3356 enum device_endian endian)
3358 uint8_t *ptr;
3359 MemoryRegion *mr;
3360 hwaddr l = 4;
3361 hwaddr addr1;
3362 MemTxResult r;
3363 bool release_lock = false;
3365 rcu_read_lock();
3366 mr = address_space_translate(as, addr, &addr1, &l,
3367 true);
3368 if (l < 4 || !memory_access_is_direct(mr, true)) {
3369 release_lock |= prepare_mmio_access(mr);
3371 #if defined(TARGET_WORDS_BIGENDIAN)
3372 if (endian == DEVICE_LITTLE_ENDIAN) {
3373 val = bswap32(val);
3375 #else
3376 if (endian == DEVICE_BIG_ENDIAN) {
3377 val = bswap32(val);
3379 #endif
3380 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3381 } else {
3382 /* RAM case */
3383 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3384 switch (endian) {
3385 case DEVICE_LITTLE_ENDIAN:
3386 stl_le_p(ptr, val);
3387 break;
3388 case DEVICE_BIG_ENDIAN:
3389 stl_be_p(ptr, val);
3390 break;
3391 default:
3392 stl_p(ptr, val);
3393 break;
3395 invalidate_and_set_dirty(mr, addr1, 4);
3396 r = MEMTX_OK;
3398 if (result) {
3399 *result = r;
3401 if (release_lock) {
3402 qemu_mutex_unlock_iothread();
3404 rcu_read_unlock();
3407 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3408 MemTxAttrs attrs, MemTxResult *result)
3410 address_space_stl_internal(as, addr, val, attrs, result,
3411 DEVICE_NATIVE_ENDIAN);
3414 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3415 MemTxAttrs attrs, MemTxResult *result)
3417 address_space_stl_internal(as, addr, val, attrs, result,
3418 DEVICE_LITTLE_ENDIAN);
3421 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3422 MemTxAttrs attrs, MemTxResult *result)
3424 address_space_stl_internal(as, addr, val, attrs, result,
3425 DEVICE_BIG_ENDIAN);
3428 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3430 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3433 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3435 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3438 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3440 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3443 /* XXX: optimize */
3444 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3445 MemTxAttrs attrs, MemTxResult *result)
3447 uint8_t v = val;
3448 MemTxResult r;
3450 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3451 if (result) {
3452 *result = r;
3456 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3458 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3461 /* warning: addr must be aligned */
3462 static inline void address_space_stw_internal(AddressSpace *as,
3463 hwaddr addr, uint32_t val,
3464 MemTxAttrs attrs,
3465 MemTxResult *result,
3466 enum device_endian endian)
3468 uint8_t *ptr;
3469 MemoryRegion *mr;
3470 hwaddr l = 2;
3471 hwaddr addr1;
3472 MemTxResult r;
3473 bool release_lock = false;
3475 rcu_read_lock();
3476 mr = address_space_translate(as, addr, &addr1, &l, true);
3477 if (l < 2 || !memory_access_is_direct(mr, true)) {
3478 release_lock |= prepare_mmio_access(mr);
3480 #if defined(TARGET_WORDS_BIGENDIAN)
3481 if (endian == DEVICE_LITTLE_ENDIAN) {
3482 val = bswap16(val);
3484 #else
3485 if (endian == DEVICE_BIG_ENDIAN) {
3486 val = bswap16(val);
3488 #endif
3489 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3490 } else {
3491 /* RAM case */
3492 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3493 switch (endian) {
3494 case DEVICE_LITTLE_ENDIAN:
3495 stw_le_p(ptr, val);
3496 break;
3497 case DEVICE_BIG_ENDIAN:
3498 stw_be_p(ptr, val);
3499 break;
3500 default:
3501 stw_p(ptr, val);
3502 break;
3504 invalidate_and_set_dirty(mr, addr1, 2);
3505 r = MEMTX_OK;
3507 if (result) {
3508 *result = r;
3510 if (release_lock) {
3511 qemu_mutex_unlock_iothread();
3513 rcu_read_unlock();
3516 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3517 MemTxAttrs attrs, MemTxResult *result)
3519 address_space_stw_internal(as, addr, val, attrs, result,
3520 DEVICE_NATIVE_ENDIAN);
3523 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3524 MemTxAttrs attrs, MemTxResult *result)
3526 address_space_stw_internal(as, addr, val, attrs, result,
3527 DEVICE_LITTLE_ENDIAN);
3530 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3531 MemTxAttrs attrs, MemTxResult *result)
3533 address_space_stw_internal(as, addr, val, attrs, result,
3534 DEVICE_BIG_ENDIAN);
3537 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3539 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3542 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3544 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3547 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3549 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3552 /* XXX: optimize */
3553 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3554 MemTxAttrs attrs, MemTxResult *result)
3556 MemTxResult r;
3557 val = tswap64(val);
3558 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3559 if (result) {
3560 *result = r;
3564 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3565 MemTxAttrs attrs, MemTxResult *result)
3567 MemTxResult r;
3568 val = cpu_to_le64(val);
3569 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3570 if (result) {
3571 *result = r;
3574 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3575 MemTxAttrs attrs, MemTxResult *result)
3577 MemTxResult r;
3578 val = cpu_to_be64(val);
3579 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3580 if (result) {
3581 *result = r;
3585 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3587 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3590 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3592 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3595 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3597 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3600 /* virtual memory access for debug (includes writing to ROM) */
3601 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3602 uint8_t *buf, int len, int is_write)
3604 int l;
3605 hwaddr phys_addr;
3606 target_ulong page;
3608 while (len > 0) {
3609 int asidx;
3610 MemTxAttrs attrs;
3612 page = addr & TARGET_PAGE_MASK;
3613 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3614 asidx = cpu_asidx_from_attrs(cpu, attrs);
3615 /* if no physical page mapped, return an error */
3616 if (phys_addr == -1)
3617 return -1;
3618 l = (page + TARGET_PAGE_SIZE) - addr;
3619 if (l > len)
3620 l = len;
3621 phys_addr += (addr & ~TARGET_PAGE_MASK);
3622 if (is_write) {
3623 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3624 phys_addr, buf, l);
3625 } else {
3626 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3627 MEMTXATTRS_UNSPECIFIED,
3628 buf, l, 0);
3630 len -= l;
3631 buf += l;
3632 addr += l;
3634 return 0;
3638 * Allows code that needs to deal with migration bitmaps etc to still be built
3639 * target independent.
3641 size_t qemu_target_page_bits(void)
3643 return TARGET_PAGE_BITS;
3646 #endif
3649 * A helper function for the _utterly broken_ virtio device model to find out if
3650 * it's running on a big endian machine. Don't do this at home kids!
3652 bool target_words_bigendian(void);
3653 bool target_words_bigendian(void)
3655 #if defined(TARGET_WORDS_BIGENDIAN)
3656 return true;
3657 #else
3658 return false;
3659 #endif
3662 #ifndef CONFIG_USER_ONLY
3663 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3665 MemoryRegion*mr;
3666 hwaddr l = 1;
3667 bool res;
3669 rcu_read_lock();
3670 mr = address_space_translate(&address_space_memory,
3671 phys_addr, &phys_addr, &l, false);
3673 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3674 rcu_read_unlock();
3675 return res;
3678 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3680 RAMBlock *block;
3681 int ret = 0;
3683 rcu_read_lock();
3684 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3685 ret = func(block->idstr, block->host, block->offset,
3686 block->used_length, opaque);
3687 if (ret) {
3688 break;
3691 rcu_read_unlock();
3692 return ret;
3694 #endif