4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "exec/cputlb.h"
52 #include "translate-all.h"
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
57 #include "qemu/range.h"
59 //#define DEBUG_SUBPAGE
61 #if !defined(CONFIG_USER_ONLY)
62 static bool in_migration
;
64 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
67 RAMList ram_list
= { .blocks
= QLIST_HEAD_INITIALIZER(ram_list
.blocks
) };
69 static MemoryRegion
*system_memory
;
70 static MemoryRegion
*system_io
;
72 AddressSpace address_space_io
;
73 AddressSpace address_space_memory
;
75 MemoryRegion io_mem_rom
, io_mem_notdirty
;
76 static MemoryRegion io_mem_unassigned
;
78 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79 #define RAM_PREALLOC (1 << 0)
81 /* RAM is mmap-ed with MAP_SHARED */
82 #define RAM_SHARED (1 << 1)
84 /* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
87 #define RAM_RESIZEABLE (1 << 2)
91 struct CPUTailQ cpus
= QTAILQ_HEAD_INITIALIZER(cpus
);
92 /* current CPU in the current thread. It is only valid inside
94 DEFINE_TLS(CPUState
*, current_cpu
);
95 /* 0 = Do not count executed instructions.
96 1 = Precise instruction counting.
97 2 = Adaptive rate instruction counting. */
100 #if !defined(CONFIG_USER_ONLY)
102 typedef struct PhysPageEntry PhysPageEntry
;
104 struct PhysPageEntry
{
105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
111 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
113 /* Size of the L2 (and L3, etc) page tables. */
114 #define ADDR_SPACE_BITS 64
117 #define P_L2_SIZE (1 << P_L2_BITS)
119 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
121 typedef PhysPageEntry Node
[P_L2_SIZE
];
123 typedef struct PhysPageMap
{
126 unsigned sections_nb
;
127 unsigned sections_nb_alloc
;
129 unsigned nodes_nb_alloc
;
131 MemoryRegionSection
*sections
;
134 struct AddressSpaceDispatch
{
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
140 PhysPageEntry phys_map
;
145 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146 typedef struct subpage_t
{
150 uint16_t sub_section
[TARGET_PAGE_SIZE
];
153 #define PHYS_SECTION_UNASSIGNED 0
154 #define PHYS_SECTION_NOTDIRTY 1
155 #define PHYS_SECTION_ROM 2
156 #define PHYS_SECTION_WATCH 3
158 static void io_mem_init(void);
159 static void memory_map_init(void);
160 static void tcg_commit(MemoryListener
*listener
);
162 static MemoryRegion io_mem_watch
;
165 #if !defined(CONFIG_USER_ONLY)
167 static void phys_map_node_reserve(PhysPageMap
*map
, unsigned nodes
)
169 if (map
->nodes_nb
+ nodes
> map
->nodes_nb_alloc
) {
170 map
->nodes_nb_alloc
= MAX(map
->nodes_nb_alloc
* 2, 16);
171 map
->nodes_nb_alloc
= MAX(map
->nodes_nb_alloc
, map
->nodes_nb
+ nodes
);
172 map
->nodes
= g_renew(Node
, map
->nodes
, map
->nodes_nb_alloc
);
176 static uint32_t phys_map_node_alloc(PhysPageMap
*map
, bool leaf
)
183 ret
= map
->nodes_nb
++;
185 assert(ret
!= PHYS_MAP_NODE_NIL
);
186 assert(ret
!= map
->nodes_nb_alloc
);
188 e
.skip
= leaf
? 0 : 1;
189 e
.ptr
= leaf
? PHYS_SECTION_UNASSIGNED
: PHYS_MAP_NODE_NIL
;
190 for (i
= 0; i
< P_L2_SIZE
; ++i
) {
191 memcpy(&p
[i
], &e
, sizeof(e
));
196 static void phys_page_set_level(PhysPageMap
*map
, PhysPageEntry
*lp
,
197 hwaddr
*index
, hwaddr
*nb
, uint16_t leaf
,
201 hwaddr step
= (hwaddr
)1 << (level
* P_L2_BITS
);
203 if (lp
->skip
&& lp
->ptr
== PHYS_MAP_NODE_NIL
) {
204 lp
->ptr
= phys_map_node_alloc(map
, level
== 0);
206 p
= map
->nodes
[lp
->ptr
];
207 lp
= &p
[(*index
>> (level
* P_L2_BITS
)) & (P_L2_SIZE
- 1)];
209 while (*nb
&& lp
< &p
[P_L2_SIZE
]) {
210 if ((*index
& (step
- 1)) == 0 && *nb
>= step
) {
216 phys_page_set_level(map
, lp
, index
, nb
, leaf
, level
- 1);
222 static void phys_page_set(AddressSpaceDispatch
*d
,
223 hwaddr index
, hwaddr nb
,
226 /* Wildly overreserve - it doesn't matter much. */
227 phys_map_node_reserve(&d
->map
, 3 * P_L2_LEVELS
);
229 phys_page_set_level(&d
->map
, &d
->phys_map
, &index
, &nb
, leaf
, P_L2_LEVELS
- 1);
232 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
233 * and update our entry so we can skip it and go directly to the destination.
235 static void phys_page_compact(PhysPageEntry
*lp
, Node
*nodes
, unsigned long *compacted
)
237 unsigned valid_ptr
= P_L2_SIZE
;
242 if (lp
->ptr
== PHYS_MAP_NODE_NIL
) {
247 for (i
= 0; i
< P_L2_SIZE
; i
++) {
248 if (p
[i
].ptr
== PHYS_MAP_NODE_NIL
) {
255 phys_page_compact(&p
[i
], nodes
, compacted
);
259 /* We can only compress if there's only one child. */
264 assert(valid_ptr
< P_L2_SIZE
);
266 /* Don't compress if it won't fit in the # of bits we have. */
267 if (lp
->skip
+ p
[valid_ptr
].skip
>= (1 << 3)) {
271 lp
->ptr
= p
[valid_ptr
].ptr
;
272 if (!p
[valid_ptr
].skip
) {
273 /* If our only child is a leaf, make this a leaf. */
274 /* By design, we should have made this node a leaf to begin with so we
275 * should never reach here.
276 * But since it's so simple to handle this, let's do it just in case we
281 lp
->skip
+= p
[valid_ptr
].skip
;
285 static void phys_page_compact_all(AddressSpaceDispatch
*d
, int nodes_nb
)
287 DECLARE_BITMAP(compacted
, nodes_nb
);
289 if (d
->phys_map
.skip
) {
290 phys_page_compact(&d
->phys_map
, d
->map
.nodes
, compacted
);
294 static MemoryRegionSection
*phys_page_find(PhysPageEntry lp
, hwaddr addr
,
295 Node
*nodes
, MemoryRegionSection
*sections
)
298 hwaddr index
= addr
>> TARGET_PAGE_BITS
;
301 for (i
= P_L2_LEVELS
; lp
.skip
&& (i
-= lp
.skip
) >= 0;) {
302 if (lp
.ptr
== PHYS_MAP_NODE_NIL
) {
303 return §ions
[PHYS_SECTION_UNASSIGNED
];
306 lp
= p
[(index
>> (i
* P_L2_BITS
)) & (P_L2_SIZE
- 1)];
309 if (sections
[lp
.ptr
].size
.hi
||
310 range_covers_byte(sections
[lp
.ptr
].offset_within_address_space
,
311 sections
[lp
.ptr
].size
.lo
, addr
)) {
312 return §ions
[lp
.ptr
];
314 return §ions
[PHYS_SECTION_UNASSIGNED
];
318 bool memory_region_is_unassigned(MemoryRegion
*mr
)
320 return mr
!= &io_mem_rom
&& mr
!= &io_mem_notdirty
&& !mr
->rom_device
321 && mr
!= &io_mem_watch
;
324 /* Called from RCU critical section */
325 static MemoryRegionSection
*address_space_lookup_region(AddressSpaceDispatch
*d
,
327 bool resolve_subpage
)
329 MemoryRegionSection
*section
;
332 section
= phys_page_find(d
->phys_map
, addr
, d
->map
.nodes
, d
->map
.sections
);
333 if (resolve_subpage
&& section
->mr
->subpage
) {
334 subpage
= container_of(section
->mr
, subpage_t
, iomem
);
335 section
= &d
->map
.sections
[subpage
->sub_section
[SUBPAGE_IDX(addr
)]];
340 /* Called from RCU critical section */
341 static MemoryRegionSection
*
342 address_space_translate_internal(AddressSpaceDispatch
*d
, hwaddr addr
, hwaddr
*xlat
,
343 hwaddr
*plen
, bool resolve_subpage
)
345 MemoryRegionSection
*section
;
348 section
= address_space_lookup_region(d
, addr
, resolve_subpage
);
349 /* Compute offset within MemoryRegionSection */
350 addr
-= section
->offset_within_address_space
;
352 /* Compute offset within MemoryRegion */
353 *xlat
= addr
+ section
->offset_within_region
;
355 diff
= int128_sub(section
->mr
->size
, int128_make64(addr
));
356 *plen
= int128_get64(int128_min(diff
, int128_make64(*plen
)));
360 static inline bool memory_access_is_direct(MemoryRegion
*mr
, bool is_write
)
362 if (memory_region_is_ram(mr
)) {
363 return !(is_write
&& mr
->readonly
);
365 if (memory_region_is_romd(mr
)) {
372 /* Called from RCU critical section */
373 MemoryRegion
*address_space_translate(AddressSpace
*as
, hwaddr addr
,
374 hwaddr
*xlat
, hwaddr
*plen
,
378 MemoryRegionSection
*section
;
382 AddressSpaceDispatch
*d
= atomic_rcu_read(&as
->dispatch
);
383 section
= address_space_translate_internal(d
, addr
, &addr
, plen
, true);
386 if (!mr
->iommu_ops
) {
390 iotlb
= mr
->iommu_ops
->translate(mr
, addr
, is_write
);
391 addr
= ((iotlb
.translated_addr
& ~iotlb
.addr_mask
)
392 | (addr
& iotlb
.addr_mask
));
393 *plen
= MIN(*plen
, (addr
| iotlb
.addr_mask
) - addr
+ 1);
394 if (!(iotlb
.perm
& (1 << is_write
))) {
395 mr
= &io_mem_unassigned
;
399 as
= iotlb
.target_as
;
402 if (xen_enabled() && memory_access_is_direct(mr
, is_write
)) {
403 hwaddr page
= ((addr
& TARGET_PAGE_MASK
) + TARGET_PAGE_SIZE
) - addr
;
404 *plen
= MIN(page
, *plen
);
411 /* Called from RCU critical section */
412 MemoryRegionSection
*
413 address_space_translate_for_iotlb(CPUState
*cpu
, hwaddr addr
,
414 hwaddr
*xlat
, hwaddr
*plen
)
416 MemoryRegionSection
*section
;
417 section
= address_space_translate_internal(cpu
->memory_dispatch
,
418 addr
, xlat
, plen
, false);
420 assert(!section
->mr
->iommu_ops
);
425 #if !defined(CONFIG_USER_ONLY)
427 static int cpu_common_post_load(void *opaque
, int version_id
)
429 CPUState
*cpu
= opaque
;
431 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
432 version_id is increased. */
433 cpu
->interrupt_request
&= ~0x01;
439 static int cpu_common_pre_load(void *opaque
)
441 CPUState
*cpu
= opaque
;
443 cpu
->exception_index
= -1;
448 static bool cpu_common_exception_index_needed(void *opaque
)
450 CPUState
*cpu
= opaque
;
452 return tcg_enabled() && cpu
->exception_index
!= -1;
455 static const VMStateDescription vmstate_cpu_common_exception_index
= {
456 .name
= "cpu_common/exception_index",
458 .minimum_version_id
= 1,
459 .fields
= (VMStateField
[]) {
460 VMSTATE_INT32(exception_index
, CPUState
),
461 VMSTATE_END_OF_LIST()
465 const VMStateDescription vmstate_cpu_common
= {
466 .name
= "cpu_common",
468 .minimum_version_id
= 1,
469 .pre_load
= cpu_common_pre_load
,
470 .post_load
= cpu_common_post_load
,
471 .fields
= (VMStateField
[]) {
472 VMSTATE_UINT32(halted
, CPUState
),
473 VMSTATE_UINT32(interrupt_request
, CPUState
),
474 VMSTATE_END_OF_LIST()
476 .subsections
= (VMStateSubsection
[]) {
478 .vmsd
= &vmstate_cpu_common_exception_index
,
479 .needed
= cpu_common_exception_index_needed
,
488 CPUState
*qemu_get_cpu(int index
)
493 if (cpu
->cpu_index
== index
) {
501 #if !defined(CONFIG_USER_ONLY)
502 void tcg_cpu_address_space_init(CPUState
*cpu
, AddressSpace
*as
)
504 /* We only support one address space per cpu at the moment. */
505 assert(cpu
->as
== as
);
507 if (cpu
->tcg_as_listener
) {
508 memory_listener_unregister(cpu
->tcg_as_listener
);
510 cpu
->tcg_as_listener
= g_new0(MemoryListener
, 1);
512 cpu
->tcg_as_listener
->commit
= tcg_commit
;
513 memory_listener_register(cpu
->tcg_as_listener
, as
);
517 void cpu_exec_init(CPUArchState
*env
)
519 CPUState
*cpu
= ENV_GET_CPU(env
);
520 CPUClass
*cc
= CPU_GET_CLASS(cpu
);
524 #if defined(CONFIG_USER_ONLY)
528 CPU_FOREACH(some_cpu
) {
531 cpu
->cpu_index
= cpu_index
;
533 QTAILQ_INIT(&cpu
->breakpoints
);
534 QTAILQ_INIT(&cpu
->watchpoints
);
535 #ifndef CONFIG_USER_ONLY
536 cpu
->as
= &address_space_memory
;
537 cpu
->thread_id
= qemu_get_thread_id();
538 cpu_reload_memory_map(cpu
);
540 QTAILQ_INSERT_TAIL(&cpus
, cpu
, node
);
541 #if defined(CONFIG_USER_ONLY)
544 if (qdev_get_vmsd(DEVICE(cpu
)) == NULL
) {
545 vmstate_register(NULL
, cpu_index
, &vmstate_cpu_common
, cpu
);
547 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
548 register_savevm(NULL
, "cpu", cpu_index
, CPU_SAVE_VERSION
,
549 cpu_save
, cpu_load
, env
);
550 assert(cc
->vmsd
== NULL
);
551 assert(qdev_get_vmsd(DEVICE(cpu
)) == NULL
);
553 if (cc
->vmsd
!= NULL
) {
554 vmstate_register(NULL
, cpu_index
, cc
->vmsd
, cpu
);
558 #if defined(CONFIG_USER_ONLY)
559 static void breakpoint_invalidate(CPUState
*cpu
, target_ulong pc
)
561 tb_invalidate_phys_page_range(pc
, pc
+ 1, 0);
564 static void breakpoint_invalidate(CPUState
*cpu
, target_ulong pc
)
566 hwaddr phys
= cpu_get_phys_page_debug(cpu
, pc
);
568 tb_invalidate_phys_addr(cpu
->as
,
569 phys
| (pc
& ~TARGET_PAGE_MASK
));
574 #if defined(CONFIG_USER_ONLY)
575 void cpu_watchpoint_remove_all(CPUState
*cpu
, int mask
)
580 int cpu_watchpoint_remove(CPUState
*cpu
, vaddr addr
, vaddr len
,
586 void cpu_watchpoint_remove_by_ref(CPUState
*cpu
, CPUWatchpoint
*watchpoint
)
590 int cpu_watchpoint_insert(CPUState
*cpu
, vaddr addr
, vaddr len
,
591 int flags
, CPUWatchpoint
**watchpoint
)
596 /* Add a watchpoint. */
597 int cpu_watchpoint_insert(CPUState
*cpu
, vaddr addr
, vaddr len
,
598 int flags
, CPUWatchpoint
**watchpoint
)
602 /* forbid ranges which are empty or run off the end of the address space */
603 if (len
== 0 || (addr
+ len
- 1) < addr
) {
604 error_report("tried to set invalid watchpoint at %"
605 VADDR_PRIx
", len=%" VADDR_PRIu
, addr
, len
);
608 wp
= g_malloc(sizeof(*wp
));
614 /* keep all GDB-injected watchpoints in front */
615 if (flags
& BP_GDB
) {
616 QTAILQ_INSERT_HEAD(&cpu
->watchpoints
, wp
, entry
);
618 QTAILQ_INSERT_TAIL(&cpu
->watchpoints
, wp
, entry
);
621 tlb_flush_page(cpu
, addr
);
628 /* Remove a specific watchpoint. */
629 int cpu_watchpoint_remove(CPUState
*cpu
, vaddr addr
, vaddr len
,
634 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
635 if (addr
== wp
->vaddr
&& len
== wp
->len
636 && flags
== (wp
->flags
& ~BP_WATCHPOINT_HIT
)) {
637 cpu_watchpoint_remove_by_ref(cpu
, wp
);
644 /* Remove a specific watchpoint by reference. */
645 void cpu_watchpoint_remove_by_ref(CPUState
*cpu
, CPUWatchpoint
*watchpoint
)
647 QTAILQ_REMOVE(&cpu
->watchpoints
, watchpoint
, entry
);
649 tlb_flush_page(cpu
, watchpoint
->vaddr
);
654 /* Remove all matching watchpoints. */
655 void cpu_watchpoint_remove_all(CPUState
*cpu
, int mask
)
657 CPUWatchpoint
*wp
, *next
;
659 QTAILQ_FOREACH_SAFE(wp
, &cpu
->watchpoints
, entry
, next
) {
660 if (wp
->flags
& mask
) {
661 cpu_watchpoint_remove_by_ref(cpu
, wp
);
666 /* Return true if this watchpoint address matches the specified
667 * access (ie the address range covered by the watchpoint overlaps
668 * partially or completely with the address range covered by the
671 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint
*wp
,
675 /* We know the lengths are non-zero, but a little caution is
676 * required to avoid errors in the case where the range ends
677 * exactly at the top of the address space and so addr + len
678 * wraps round to zero.
680 vaddr wpend
= wp
->vaddr
+ wp
->len
- 1;
681 vaddr addrend
= addr
+ len
- 1;
683 return !(addr
> wpend
|| wp
->vaddr
> addrend
);
688 /* Add a breakpoint. */
689 int cpu_breakpoint_insert(CPUState
*cpu
, vaddr pc
, int flags
,
690 CPUBreakpoint
**breakpoint
)
694 bp
= g_malloc(sizeof(*bp
));
699 /* keep all GDB-injected breakpoints in front */
700 if (flags
& BP_GDB
) {
701 QTAILQ_INSERT_HEAD(&cpu
->breakpoints
, bp
, entry
);
703 QTAILQ_INSERT_TAIL(&cpu
->breakpoints
, bp
, entry
);
706 breakpoint_invalidate(cpu
, pc
);
714 /* Remove a specific breakpoint. */
715 int cpu_breakpoint_remove(CPUState
*cpu
, vaddr pc
, int flags
)
719 QTAILQ_FOREACH(bp
, &cpu
->breakpoints
, entry
) {
720 if (bp
->pc
== pc
&& bp
->flags
== flags
) {
721 cpu_breakpoint_remove_by_ref(cpu
, bp
);
728 /* Remove a specific breakpoint by reference. */
729 void cpu_breakpoint_remove_by_ref(CPUState
*cpu
, CPUBreakpoint
*breakpoint
)
731 QTAILQ_REMOVE(&cpu
->breakpoints
, breakpoint
, entry
);
733 breakpoint_invalidate(cpu
, breakpoint
->pc
);
738 /* Remove all matching breakpoints. */
739 void cpu_breakpoint_remove_all(CPUState
*cpu
, int mask
)
741 CPUBreakpoint
*bp
, *next
;
743 QTAILQ_FOREACH_SAFE(bp
, &cpu
->breakpoints
, entry
, next
) {
744 if (bp
->flags
& mask
) {
745 cpu_breakpoint_remove_by_ref(cpu
, bp
);
750 /* enable or disable single step mode. EXCP_DEBUG is returned by the
751 CPU loop after each instruction */
752 void cpu_single_step(CPUState
*cpu
, int enabled
)
754 if (cpu
->singlestep_enabled
!= enabled
) {
755 cpu
->singlestep_enabled
= enabled
;
757 kvm_update_guest_debug(cpu
, 0);
759 /* must flush all the translated code to avoid inconsistencies */
760 /* XXX: only flush what is necessary */
761 CPUArchState
*env
= cpu
->env_ptr
;
767 void cpu_abort(CPUState
*cpu
, const char *fmt
, ...)
774 fprintf(stderr
, "qemu: fatal: ");
775 vfprintf(stderr
, fmt
, ap
);
776 fprintf(stderr
, "\n");
777 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
| CPU_DUMP_CCOP
);
778 if (qemu_log_enabled()) {
779 qemu_log("qemu: fatal: ");
780 qemu_log_vprintf(fmt
, ap2
);
782 log_cpu_state(cpu
, CPU_DUMP_FPU
| CPU_DUMP_CCOP
);
788 #if defined(CONFIG_USER_ONLY)
790 struct sigaction act
;
791 sigfillset(&act
.sa_mask
);
792 act
.sa_handler
= SIG_DFL
;
793 sigaction(SIGABRT
, &act
, NULL
);
799 #if !defined(CONFIG_USER_ONLY)
800 /* Called from RCU critical section */
801 static RAMBlock
*qemu_get_ram_block(ram_addr_t addr
)
805 block
= atomic_rcu_read(&ram_list
.mru_block
);
806 if (block
&& addr
- block
->offset
< block
->max_length
) {
809 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
810 if (addr
- block
->offset
< block
->max_length
) {
815 fprintf(stderr
, "Bad ram offset %" PRIx64
"\n", (uint64_t)addr
);
819 /* It is safe to write mru_block outside the iothread lock. This
824 * xxx removed from list
828 * call_rcu(reclaim_ramblock, xxx);
831 * atomic_rcu_set is not needed here. The block was already published
832 * when it was placed into the list. Here we're just making an extra
833 * copy of the pointer.
835 ram_list
.mru_block
= block
;
839 static void tlb_reset_dirty_range_all(ram_addr_t start
, ram_addr_t length
)
845 end
= TARGET_PAGE_ALIGN(start
+ length
);
846 start
&= TARGET_PAGE_MASK
;
849 block
= qemu_get_ram_block(start
);
850 assert(block
== qemu_get_ram_block(end
- 1));
851 start1
= (uintptr_t)ramblock_ptr(block
, start
- block
->offset
);
852 cpu_tlb_reset_dirty_all(start1
, length
);
856 /* Note: start and end must be within the same ram block. */
857 void cpu_physical_memory_reset_dirty(ram_addr_t start
, ram_addr_t length
,
862 cpu_physical_memory_clear_dirty_range_type(start
, length
, client
);
865 tlb_reset_dirty_range_all(start
, length
);
869 static void cpu_physical_memory_set_dirty_tracking(bool enable
)
871 in_migration
= enable
;
874 /* Called from RCU critical section */
875 hwaddr
memory_region_section_get_iotlb(CPUState
*cpu
,
876 MemoryRegionSection
*section
,
878 hwaddr paddr
, hwaddr xlat
,
880 target_ulong
*address
)
885 if (memory_region_is_ram(section
->mr
)) {
887 iotlb
= (memory_region_get_ram_addr(section
->mr
) & TARGET_PAGE_MASK
)
889 if (!section
->readonly
) {
890 iotlb
|= PHYS_SECTION_NOTDIRTY
;
892 iotlb
|= PHYS_SECTION_ROM
;
895 iotlb
= section
- section
->address_space
->dispatch
->map
.sections
;
899 /* Make accesses to pages with watchpoints go via the
900 watchpoint trap routines. */
901 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
902 if (cpu_watchpoint_address_matches(wp
, vaddr
, TARGET_PAGE_SIZE
)) {
903 /* Avoid trapping reads of pages with a write breakpoint. */
904 if ((prot
& PAGE_WRITE
) || (wp
->flags
& BP_MEM_READ
)) {
905 iotlb
= PHYS_SECTION_WATCH
+ paddr
;
906 *address
|= TLB_MMIO
;
914 #endif /* defined(CONFIG_USER_ONLY) */
916 #if !defined(CONFIG_USER_ONLY)
918 static int subpage_register (subpage_t
*mmio
, uint32_t start
, uint32_t end
,
920 static subpage_t
*subpage_init(AddressSpace
*as
, hwaddr base
);
922 static void *(*phys_mem_alloc
)(size_t size
, uint64_t *align
) =
926 * Set a custom physical guest memory alloator.
927 * Accelerators with unusual needs may need this. Hopefully, we can
928 * get rid of it eventually.
930 void phys_mem_set_alloc(void *(*alloc
)(size_t, uint64_t *align
))
932 phys_mem_alloc
= alloc
;
935 static uint16_t phys_section_add(PhysPageMap
*map
,
936 MemoryRegionSection
*section
)
938 /* The physical section number is ORed with a page-aligned
939 * pointer to produce the iotlb entries. Thus it should
940 * never overflow into the page-aligned value.
942 assert(map
->sections_nb
< TARGET_PAGE_SIZE
);
944 if (map
->sections_nb
== map
->sections_nb_alloc
) {
945 map
->sections_nb_alloc
= MAX(map
->sections_nb_alloc
* 2, 16);
946 map
->sections
= g_renew(MemoryRegionSection
, map
->sections
,
947 map
->sections_nb_alloc
);
949 map
->sections
[map
->sections_nb
] = *section
;
950 memory_region_ref(section
->mr
);
951 return map
->sections_nb
++;
954 static void phys_section_destroy(MemoryRegion
*mr
)
956 memory_region_unref(mr
);
959 subpage_t
*subpage
= container_of(mr
, subpage_t
, iomem
);
960 object_unref(OBJECT(&subpage
->iomem
));
965 static void phys_sections_free(PhysPageMap
*map
)
967 while (map
->sections_nb
> 0) {
968 MemoryRegionSection
*section
= &map
->sections
[--map
->sections_nb
];
969 phys_section_destroy(section
->mr
);
971 g_free(map
->sections
);
975 static void register_subpage(AddressSpaceDispatch
*d
, MemoryRegionSection
*section
)
978 hwaddr base
= section
->offset_within_address_space
980 MemoryRegionSection
*existing
= phys_page_find(d
->phys_map
, base
,
981 d
->map
.nodes
, d
->map
.sections
);
982 MemoryRegionSection subsection
= {
983 .offset_within_address_space
= base
,
984 .size
= int128_make64(TARGET_PAGE_SIZE
),
988 assert(existing
->mr
->subpage
|| existing
->mr
== &io_mem_unassigned
);
990 if (!(existing
->mr
->subpage
)) {
991 subpage
= subpage_init(d
->as
, base
);
992 subsection
.address_space
= d
->as
;
993 subsection
.mr
= &subpage
->iomem
;
994 phys_page_set(d
, base
>> TARGET_PAGE_BITS
, 1,
995 phys_section_add(&d
->map
, &subsection
));
997 subpage
= container_of(existing
->mr
, subpage_t
, iomem
);
999 start
= section
->offset_within_address_space
& ~TARGET_PAGE_MASK
;
1000 end
= start
+ int128_get64(section
->size
) - 1;
1001 subpage_register(subpage
, start
, end
,
1002 phys_section_add(&d
->map
, section
));
1006 static void register_multipage(AddressSpaceDispatch
*d
,
1007 MemoryRegionSection
*section
)
1009 hwaddr start_addr
= section
->offset_within_address_space
;
1010 uint16_t section_index
= phys_section_add(&d
->map
, section
);
1011 uint64_t num_pages
= int128_get64(int128_rshift(section
->size
,
1015 phys_page_set(d
, start_addr
>> TARGET_PAGE_BITS
, num_pages
, section_index
);
1018 static void mem_add(MemoryListener
*listener
, MemoryRegionSection
*section
)
1020 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
1021 AddressSpaceDispatch
*d
= as
->next_dispatch
;
1022 MemoryRegionSection now
= *section
, remain
= *section
;
1023 Int128 page_size
= int128_make64(TARGET_PAGE_SIZE
);
1025 if (now
.offset_within_address_space
& ~TARGET_PAGE_MASK
) {
1026 uint64_t left
= TARGET_PAGE_ALIGN(now
.offset_within_address_space
)
1027 - now
.offset_within_address_space
;
1029 now
.size
= int128_min(int128_make64(left
), now
.size
);
1030 register_subpage(d
, &now
);
1032 now
.size
= int128_zero();
1034 while (int128_ne(remain
.size
, now
.size
)) {
1035 remain
.size
= int128_sub(remain
.size
, now
.size
);
1036 remain
.offset_within_address_space
+= int128_get64(now
.size
);
1037 remain
.offset_within_region
+= int128_get64(now
.size
);
1039 if (int128_lt(remain
.size
, page_size
)) {
1040 register_subpage(d
, &now
);
1041 } else if (remain
.offset_within_address_space
& ~TARGET_PAGE_MASK
) {
1042 now
.size
= page_size
;
1043 register_subpage(d
, &now
);
1045 now
.size
= int128_and(now
.size
, int128_neg(page_size
));
1046 register_multipage(d
, &now
);
1051 void qemu_flush_coalesced_mmio_buffer(void)
1054 kvm_flush_coalesced_mmio_buffer();
1057 void qemu_mutex_lock_ramlist(void)
1059 qemu_mutex_lock(&ram_list
.mutex
);
1062 void qemu_mutex_unlock_ramlist(void)
1064 qemu_mutex_unlock(&ram_list
.mutex
);
1069 #include <sys/vfs.h>
1071 #define HUGETLBFS_MAGIC 0x958458f6
1073 static long gethugepagesize(const char *path
, Error
**errp
)
1079 ret
= statfs(path
, &fs
);
1080 } while (ret
!= 0 && errno
== EINTR
);
1083 error_setg_errno(errp
, errno
, "failed to get page size of file %s",
1088 if (fs
.f_type
!= HUGETLBFS_MAGIC
)
1089 fprintf(stderr
, "Warning: path not on HugeTLBFS: %s\n", path
);
1094 static void *file_ram_alloc(RAMBlock
*block
,
1100 char *sanitized_name
;
1105 Error
*local_err
= NULL
;
1107 hpagesize
= gethugepagesize(path
, &local_err
);
1109 error_propagate(errp
, local_err
);
1112 block
->mr
->align
= hpagesize
;
1114 if (memory
< hpagesize
) {
1115 error_setg(errp
, "memory size 0x" RAM_ADDR_FMT
" must be equal to "
1116 "or larger than huge page size 0x%" PRIx64
,
1121 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1123 "host lacks kvm mmu notifiers, -mem-path unsupported");
1127 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1128 sanitized_name
= g_strdup(memory_region_name(block
->mr
));
1129 for (c
= sanitized_name
; *c
!= '\0'; c
++) {
1134 filename
= g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path
,
1136 g_free(sanitized_name
);
1138 fd
= mkstemp(filename
);
1140 error_setg_errno(errp
, errno
,
1141 "unable to create backing store for hugepages");
1148 memory
= (memory
+hpagesize
-1) & ~(hpagesize
-1);
1151 * ftruncate is not supported by hugetlbfs in older
1152 * hosts, so don't bother bailing out on errors.
1153 * If anything goes wrong with it under other filesystems,
1156 if (ftruncate(fd
, memory
)) {
1157 perror("ftruncate");
1160 area
= mmap(0, memory
, PROT_READ
| PROT_WRITE
,
1161 (block
->flags
& RAM_SHARED
? MAP_SHARED
: MAP_PRIVATE
),
1163 if (area
== MAP_FAILED
) {
1164 error_setg_errno(errp
, errno
,
1165 "unable to map backing store for hugepages");
1171 os_mem_prealloc(fd
, area
, memory
);
1179 error_report("%s", error_get_pretty(*errp
));
1186 /* Called with the ramlist lock held. */
1187 static ram_addr_t
find_ram_offset(ram_addr_t size
)
1189 RAMBlock
*block
, *next_block
;
1190 ram_addr_t offset
= RAM_ADDR_MAX
, mingap
= RAM_ADDR_MAX
;
1192 assert(size
!= 0); /* it would hand out same offset multiple times */
1194 if (QLIST_EMPTY_RCU(&ram_list
.blocks
)) {
1198 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1199 ram_addr_t end
, next
= RAM_ADDR_MAX
;
1201 end
= block
->offset
+ block
->max_length
;
1203 QLIST_FOREACH_RCU(next_block
, &ram_list
.blocks
, next
) {
1204 if (next_block
->offset
>= end
) {
1205 next
= MIN(next
, next_block
->offset
);
1208 if (next
- end
>= size
&& next
- end
< mingap
) {
1210 mingap
= next
- end
;
1214 if (offset
== RAM_ADDR_MAX
) {
1215 fprintf(stderr
, "Failed to find gap of requested size: %" PRIu64
"\n",
1223 ram_addr_t
last_ram_offset(void)
1226 ram_addr_t last
= 0;
1229 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1230 last
= MAX(last
, block
->offset
+ block
->max_length
);
1236 static void qemu_ram_setup_dump(void *addr
, ram_addr_t size
)
1240 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1241 if (!machine_dump_guest_core(current_machine
)) {
1242 ret
= qemu_madvise(addr
, size
, QEMU_MADV_DONTDUMP
);
1244 perror("qemu_madvise");
1245 fprintf(stderr
, "madvise doesn't support MADV_DONTDUMP, "
1246 "but dump_guest_core=off specified\n");
1251 /* Called within an RCU critical section, or while the ramlist lock
1254 static RAMBlock
*find_ram_block(ram_addr_t addr
)
1258 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1259 if (block
->offset
== addr
) {
1267 /* Called with iothread lock held. */
1268 void qemu_ram_set_idstr(ram_addr_t addr
, const char *name
, DeviceState
*dev
)
1270 RAMBlock
*new_block
, *block
;
1273 new_block
= find_ram_block(addr
);
1275 assert(!new_block
->idstr
[0]);
1278 char *id
= qdev_get_dev_path(dev
);
1280 snprintf(new_block
->idstr
, sizeof(new_block
->idstr
), "%s/", id
);
1284 pstrcat(new_block
->idstr
, sizeof(new_block
->idstr
), name
);
1286 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1287 if (block
!= new_block
&& !strcmp(block
->idstr
, new_block
->idstr
)) {
1288 fprintf(stderr
, "RAMBlock \"%s\" already registered, abort!\n",
1296 /* Called with iothread lock held. */
1297 void qemu_ram_unset_idstr(ram_addr_t addr
)
1301 /* FIXME: arch_init.c assumes that this is not called throughout
1302 * migration. Ignore the problem since hot-unplug during migration
1303 * does not work anyway.
1307 block
= find_ram_block(addr
);
1309 memset(block
->idstr
, 0, sizeof(block
->idstr
));
1314 static int memory_try_enable_merging(void *addr
, size_t len
)
1316 if (!machine_mem_merge(current_machine
)) {
1317 /* disabled by the user */
1321 return qemu_madvise(addr
, len
, QEMU_MADV_MERGEABLE
);
1324 /* Only legal before guest might have detected the memory size: e.g. on
1325 * incoming migration, or right after reset.
1327 * As memory core doesn't know how is memory accessed, it is up to
1328 * resize callback to update device state and/or add assertions to detect
1329 * misuse, if necessary.
1331 int qemu_ram_resize(ram_addr_t base
, ram_addr_t newsize
, Error
**errp
)
1333 RAMBlock
*block
= find_ram_block(base
);
1337 newsize
= TARGET_PAGE_ALIGN(newsize
);
1339 if (block
->used_length
== newsize
) {
1343 if (!(block
->flags
& RAM_RESIZEABLE
)) {
1344 error_setg_errno(errp
, EINVAL
,
1345 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1346 " in != 0x" RAM_ADDR_FMT
, block
->idstr
,
1347 newsize
, block
->used_length
);
1351 if (block
->max_length
< newsize
) {
1352 error_setg_errno(errp
, EINVAL
,
1353 "Length too large: %s: 0x" RAM_ADDR_FMT
1354 " > 0x" RAM_ADDR_FMT
, block
->idstr
,
1355 newsize
, block
->max_length
);
1359 cpu_physical_memory_clear_dirty_range(block
->offset
, block
->used_length
);
1360 block
->used_length
= newsize
;
1361 cpu_physical_memory_set_dirty_range(block
->offset
, block
->used_length
);
1362 memory_region_set_size(block
->mr
, newsize
);
1363 if (block
->resized
) {
1364 block
->resized(block
->idstr
, newsize
, block
->host
);
1369 static ram_addr_t
ram_block_add(RAMBlock
*new_block
, Error
**errp
)
1372 RAMBlock
*last_block
= NULL
;
1373 ram_addr_t old_ram_size
, new_ram_size
;
1375 old_ram_size
= last_ram_offset() >> TARGET_PAGE_BITS
;
1377 qemu_mutex_lock_ramlist();
1378 new_block
->offset
= find_ram_offset(new_block
->max_length
);
1380 if (!new_block
->host
) {
1381 if (xen_enabled()) {
1382 xen_ram_alloc(new_block
->offset
, new_block
->max_length
,
1385 new_block
->host
= phys_mem_alloc(new_block
->max_length
,
1386 &new_block
->mr
->align
);
1387 if (!new_block
->host
) {
1388 error_setg_errno(errp
, errno
,
1389 "cannot set up guest memory '%s'",
1390 memory_region_name(new_block
->mr
));
1391 qemu_mutex_unlock_ramlist();
1394 memory_try_enable_merging(new_block
->host
, new_block
->max_length
);
1398 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1399 * QLIST (which has an RCU-friendly variant) does not have insertion at
1400 * tail, so save the last element in last_block.
1402 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1404 if (block
->max_length
< new_block
->max_length
) {
1409 QLIST_INSERT_BEFORE_RCU(block
, new_block
, next
);
1410 } else if (last_block
) {
1411 QLIST_INSERT_AFTER_RCU(last_block
, new_block
, next
);
1412 } else { /* list is empty */
1413 QLIST_INSERT_HEAD_RCU(&ram_list
.blocks
, new_block
, next
);
1415 ram_list
.mru_block
= NULL
;
1417 /* Write list before version */
1420 qemu_mutex_unlock_ramlist();
1422 new_ram_size
= last_ram_offset() >> TARGET_PAGE_BITS
;
1424 if (new_ram_size
> old_ram_size
) {
1427 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1428 for (i
= 0; i
< DIRTY_MEMORY_NUM
; i
++) {
1429 ram_list
.dirty_memory
[i
] =
1430 bitmap_zero_extend(ram_list
.dirty_memory
[i
],
1431 old_ram_size
, new_ram_size
);
1434 cpu_physical_memory_set_dirty_range(new_block
->offset
,
1435 new_block
->used_length
);
1437 if (new_block
->host
) {
1438 qemu_ram_setup_dump(new_block
->host
, new_block
->max_length
);
1439 qemu_madvise(new_block
->host
, new_block
->max_length
, QEMU_MADV_HUGEPAGE
);
1440 qemu_madvise(new_block
->host
, new_block
->max_length
, QEMU_MADV_DONTFORK
);
1441 if (kvm_enabled()) {
1442 kvm_setup_guest_memory(new_block
->host
, new_block
->max_length
);
1446 return new_block
->offset
;
1450 ram_addr_t
qemu_ram_alloc_from_file(ram_addr_t size
, MemoryRegion
*mr
,
1451 bool share
, const char *mem_path
,
1454 RAMBlock
*new_block
;
1456 Error
*local_err
= NULL
;
1458 if (xen_enabled()) {
1459 error_setg(errp
, "-mem-path not supported with Xen");
1463 if (phys_mem_alloc
!= qemu_anon_ram_alloc
) {
1465 * file_ram_alloc() needs to allocate just like
1466 * phys_mem_alloc, but we haven't bothered to provide
1470 "-mem-path not supported with this accelerator");
1474 size
= TARGET_PAGE_ALIGN(size
);
1475 new_block
= g_malloc0(sizeof(*new_block
));
1477 new_block
->used_length
= size
;
1478 new_block
->max_length
= size
;
1479 new_block
->flags
= share
? RAM_SHARED
: 0;
1480 new_block
->host
= file_ram_alloc(new_block
, size
,
1482 if (!new_block
->host
) {
1487 addr
= ram_block_add(new_block
, &local_err
);
1490 error_propagate(errp
, local_err
);
1498 ram_addr_t
qemu_ram_alloc_internal(ram_addr_t size
, ram_addr_t max_size
,
1499 void (*resized
)(const char*,
1502 void *host
, bool resizeable
,
1503 MemoryRegion
*mr
, Error
**errp
)
1505 RAMBlock
*new_block
;
1507 Error
*local_err
= NULL
;
1509 size
= TARGET_PAGE_ALIGN(size
);
1510 max_size
= TARGET_PAGE_ALIGN(max_size
);
1511 new_block
= g_malloc0(sizeof(*new_block
));
1513 new_block
->resized
= resized
;
1514 new_block
->used_length
= size
;
1515 new_block
->max_length
= max_size
;
1516 assert(max_size
>= size
);
1518 new_block
->host
= host
;
1520 new_block
->flags
|= RAM_PREALLOC
;
1523 new_block
->flags
|= RAM_RESIZEABLE
;
1525 addr
= ram_block_add(new_block
, &local_err
);
1528 error_propagate(errp
, local_err
);
1534 ram_addr_t
qemu_ram_alloc_from_ptr(ram_addr_t size
, void *host
,
1535 MemoryRegion
*mr
, Error
**errp
)
1537 return qemu_ram_alloc_internal(size
, size
, NULL
, host
, false, mr
, errp
);
1540 ram_addr_t
qemu_ram_alloc(ram_addr_t size
, MemoryRegion
*mr
, Error
**errp
)
1542 return qemu_ram_alloc_internal(size
, size
, NULL
, NULL
, false, mr
, errp
);
1545 ram_addr_t
qemu_ram_alloc_resizeable(ram_addr_t size
, ram_addr_t maxsz
,
1546 void (*resized
)(const char*,
1549 MemoryRegion
*mr
, Error
**errp
)
1551 return qemu_ram_alloc_internal(size
, maxsz
, resized
, NULL
, true, mr
, errp
);
1554 void qemu_ram_free_from_ptr(ram_addr_t addr
)
1558 qemu_mutex_lock_ramlist();
1559 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1560 if (addr
== block
->offset
) {
1561 QLIST_REMOVE_RCU(block
, next
);
1562 ram_list
.mru_block
= NULL
;
1563 /* Write list before version */
1566 g_free_rcu(block
, rcu
);
1570 qemu_mutex_unlock_ramlist();
1573 static void reclaim_ramblock(RAMBlock
*block
)
1575 if (block
->flags
& RAM_PREALLOC
) {
1577 } else if (xen_enabled()) {
1578 xen_invalidate_map_cache_entry(block
->host
);
1580 } else if (block
->fd
>= 0) {
1581 munmap(block
->host
, block
->max_length
);
1585 qemu_anon_ram_free(block
->host
, block
->max_length
);
1590 void qemu_ram_free(ram_addr_t addr
)
1594 qemu_mutex_lock_ramlist();
1595 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1596 if (addr
== block
->offset
) {
1597 QLIST_REMOVE_RCU(block
, next
);
1598 ram_list
.mru_block
= NULL
;
1599 /* Write list before version */
1602 call_rcu(block
, reclaim_ramblock
, rcu
);
1606 qemu_mutex_unlock_ramlist();
1610 void qemu_ram_remap(ram_addr_t addr
, ram_addr_t length
)
1617 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1618 offset
= addr
- block
->offset
;
1619 if (offset
< block
->max_length
) {
1620 vaddr
= ramblock_ptr(block
, offset
);
1621 if (block
->flags
& RAM_PREALLOC
) {
1623 } else if (xen_enabled()) {
1627 if (block
->fd
>= 0) {
1628 flags
|= (block
->flags
& RAM_SHARED
?
1629 MAP_SHARED
: MAP_PRIVATE
);
1630 area
= mmap(vaddr
, length
, PROT_READ
| PROT_WRITE
,
1631 flags
, block
->fd
, offset
);
1634 * Remap needs to match alloc. Accelerators that
1635 * set phys_mem_alloc never remap. If they did,
1636 * we'd need a remap hook here.
1638 assert(phys_mem_alloc
== qemu_anon_ram_alloc
);
1640 flags
|= MAP_PRIVATE
| MAP_ANONYMOUS
;
1641 area
= mmap(vaddr
, length
, PROT_READ
| PROT_WRITE
,
1644 if (area
!= vaddr
) {
1645 fprintf(stderr
, "Could not remap addr: "
1646 RAM_ADDR_FMT
"@" RAM_ADDR_FMT
"\n",
1650 memory_try_enable_merging(vaddr
, length
);
1651 qemu_ram_setup_dump(vaddr
, length
);
1656 #endif /* !_WIN32 */
1658 int qemu_get_ram_fd(ram_addr_t addr
)
1664 block
= qemu_get_ram_block(addr
);
1670 void *qemu_get_ram_block_host_ptr(ram_addr_t addr
)
1676 block
= qemu_get_ram_block(addr
);
1677 ptr
= ramblock_ptr(block
, 0);
1682 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1683 * This should not be used for general purpose DMA. Use address_space_map
1684 * or address_space_rw instead. For local memory (e.g. video ram) that the
1685 * device owns, use memory_region_get_ram_ptr.
1687 * By the time this function returns, the returned pointer is not protected
1688 * by RCU anymore. If the caller is not within an RCU critical section and
1689 * does not hold the iothread lock, it must have other means of protecting the
1690 * pointer, such as a reference to the region that includes the incoming
1693 void *qemu_get_ram_ptr(ram_addr_t addr
)
1699 block
= qemu_get_ram_block(addr
);
1701 if (xen_enabled() && block
->host
== NULL
) {
1702 /* We need to check if the requested address is in the RAM
1703 * because we don't want to map the entire memory in QEMU.
1704 * In that case just map until the end of the page.
1706 if (block
->offset
== 0) {
1707 ptr
= xen_map_cache(addr
, 0, 0);
1711 block
->host
= xen_map_cache(block
->offset
, block
->max_length
, 1);
1713 ptr
= ramblock_ptr(block
, addr
- block
->offset
);
1720 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1721 * but takes a size argument.
1723 * By the time this function returns, the returned pointer is not protected
1724 * by RCU anymore. If the caller is not within an RCU critical section and
1725 * does not hold the iothread lock, it must have other means of protecting the
1726 * pointer, such as a reference to the region that includes the incoming
1729 static void *qemu_ram_ptr_length(ram_addr_t addr
, hwaddr
*size
)
1735 if (xen_enabled()) {
1736 return xen_map_cache(addr
, *size
, 1);
1740 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1741 if (addr
- block
->offset
< block
->max_length
) {
1742 if (addr
- block
->offset
+ *size
> block
->max_length
)
1743 *size
= block
->max_length
- addr
+ block
->offset
;
1744 ptr
= ramblock_ptr(block
, addr
- block
->offset
);
1750 fprintf(stderr
, "Bad ram offset %" PRIx64
"\n", (uint64_t)addr
);
1755 /* Some of the softmmu routines need to translate from a host pointer
1756 * (typically a TLB entry) back to a ram offset.
1758 * By the time this function returns, the returned pointer is not protected
1759 * by RCU anymore. If the caller is not within an RCU critical section and
1760 * does not hold the iothread lock, it must have other means of protecting the
1761 * pointer, such as a reference to the region that includes the incoming
1764 MemoryRegion
*qemu_ram_addr_from_host(void *ptr
, ram_addr_t
*ram_addr
)
1767 uint8_t *host
= ptr
;
1770 if (xen_enabled()) {
1772 *ram_addr
= xen_ram_addr_from_mapcache(ptr
);
1773 mr
= qemu_get_ram_block(*ram_addr
)->mr
;
1779 block
= atomic_rcu_read(&ram_list
.mru_block
);
1780 if (block
&& block
->host
&& host
- block
->host
< block
->max_length
) {
1784 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1785 /* This case append when the block is not mapped. */
1786 if (block
->host
== NULL
) {
1789 if (host
- block
->host
< block
->max_length
) {
1798 *ram_addr
= block
->offset
+ (host
- block
->host
);
1804 static void notdirty_mem_write(void *opaque
, hwaddr ram_addr
,
1805 uint64_t val
, unsigned size
)
1807 if (!cpu_physical_memory_get_dirty_flag(ram_addr
, DIRTY_MEMORY_CODE
)) {
1808 tb_invalidate_phys_page_fast(ram_addr
, size
);
1812 stb_p(qemu_get_ram_ptr(ram_addr
), val
);
1815 stw_p(qemu_get_ram_ptr(ram_addr
), val
);
1818 stl_p(qemu_get_ram_ptr(ram_addr
), val
);
1823 cpu_physical_memory_set_dirty_range_nocode(ram_addr
, size
);
1824 /* we remove the notdirty callback only if the code has been
1826 if (!cpu_physical_memory_is_clean(ram_addr
)) {
1827 CPUArchState
*env
= current_cpu
->env_ptr
;
1828 tlb_set_dirty(env
, current_cpu
->mem_io_vaddr
);
1832 static bool notdirty_mem_accepts(void *opaque
, hwaddr addr
,
1833 unsigned size
, bool is_write
)
1838 static const MemoryRegionOps notdirty_mem_ops
= {
1839 .write
= notdirty_mem_write
,
1840 .valid
.accepts
= notdirty_mem_accepts
,
1841 .endianness
= DEVICE_NATIVE_ENDIAN
,
1844 /* Generate a debug exception if a watchpoint has been hit. */
1845 static void check_watchpoint(int offset
, int len
, MemTxAttrs attrs
, int flags
)
1847 CPUState
*cpu
= current_cpu
;
1848 CPUArchState
*env
= cpu
->env_ptr
;
1849 target_ulong pc
, cs_base
;
1854 if (cpu
->watchpoint_hit
) {
1855 /* We re-entered the check after replacing the TB. Now raise
1856 * the debug interrupt so that is will trigger after the
1857 * current instruction. */
1858 cpu_interrupt(cpu
, CPU_INTERRUPT_DEBUG
);
1861 vaddr
= (cpu
->mem_io_vaddr
& TARGET_PAGE_MASK
) + offset
;
1862 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
1863 if (cpu_watchpoint_address_matches(wp
, vaddr
, len
)
1864 && (wp
->flags
& flags
)) {
1865 if (flags
== BP_MEM_READ
) {
1866 wp
->flags
|= BP_WATCHPOINT_HIT_READ
;
1868 wp
->flags
|= BP_WATCHPOINT_HIT_WRITE
;
1870 wp
->hitaddr
= vaddr
;
1871 wp
->hitattrs
= attrs
;
1872 if (!cpu
->watchpoint_hit
) {
1873 cpu
->watchpoint_hit
= wp
;
1874 tb_check_watchpoint(cpu
);
1875 if (wp
->flags
& BP_STOP_BEFORE_ACCESS
) {
1876 cpu
->exception_index
= EXCP_DEBUG
;
1879 cpu_get_tb_cpu_state(env
, &pc
, &cs_base
, &cpu_flags
);
1880 tb_gen_code(cpu
, pc
, cs_base
, cpu_flags
, 1);
1881 cpu_resume_from_signal(cpu
, NULL
);
1885 wp
->flags
&= ~BP_WATCHPOINT_HIT
;
1890 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1891 so these check for a hit then pass through to the normal out-of-line
1893 static MemTxResult
watch_mem_read(void *opaque
, hwaddr addr
, uint64_t *pdata
,
1894 unsigned size
, MemTxAttrs attrs
)
1899 check_watchpoint(addr
& ~TARGET_PAGE_MASK
, size
, attrs
, BP_MEM_READ
);
1902 data
= address_space_ldub(&address_space_memory
, addr
, attrs
, &res
);
1905 data
= address_space_lduw(&address_space_memory
, addr
, attrs
, &res
);
1908 data
= address_space_ldl(&address_space_memory
, addr
, attrs
, &res
);
1916 static MemTxResult
watch_mem_write(void *opaque
, hwaddr addr
,
1917 uint64_t val
, unsigned size
,
1922 check_watchpoint(addr
& ~TARGET_PAGE_MASK
, size
, attrs
, BP_MEM_WRITE
);
1925 address_space_stb(&address_space_memory
, addr
, val
, attrs
, &res
);
1928 address_space_stw(&address_space_memory
, addr
, val
, attrs
, &res
);
1931 address_space_stl(&address_space_memory
, addr
, val
, attrs
, &res
);
1938 static const MemoryRegionOps watch_mem_ops
= {
1939 .read_with_attrs
= watch_mem_read
,
1940 .write_with_attrs
= watch_mem_write
,
1941 .endianness
= DEVICE_NATIVE_ENDIAN
,
1944 static MemTxResult
subpage_read(void *opaque
, hwaddr addr
, uint64_t *data
,
1945 unsigned len
, MemTxAttrs attrs
)
1947 subpage_t
*subpage
= opaque
;
1951 #if defined(DEBUG_SUBPAGE)
1952 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
"\n", __func__
,
1953 subpage
, len
, addr
);
1955 res
= address_space_read(subpage
->as
, addr
+ subpage
->base
,
1962 *data
= ldub_p(buf
);
1965 *data
= lduw_p(buf
);
1978 static MemTxResult
subpage_write(void *opaque
, hwaddr addr
,
1979 uint64_t value
, unsigned len
, MemTxAttrs attrs
)
1981 subpage_t
*subpage
= opaque
;
1984 #if defined(DEBUG_SUBPAGE)
1985 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1986 " value %"PRIx64
"\n",
1987 __func__
, subpage
, len
, addr
, value
);
2005 return address_space_write(subpage
->as
, addr
+ subpage
->base
,
2009 static bool subpage_accepts(void *opaque
, hwaddr addr
,
2010 unsigned len
, bool is_write
)
2012 subpage_t
*subpage
= opaque
;
2013 #if defined(DEBUG_SUBPAGE)
2014 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx
"\n",
2015 __func__
, subpage
, is_write
? 'w' : 'r', len
, addr
);
2018 return address_space_access_valid(subpage
->as
, addr
+ subpage
->base
,
2022 static const MemoryRegionOps subpage_ops
= {
2023 .read_with_attrs
= subpage_read
,
2024 .write_with_attrs
= subpage_write
,
2025 .impl
.min_access_size
= 1,
2026 .impl
.max_access_size
= 8,
2027 .valid
.min_access_size
= 1,
2028 .valid
.max_access_size
= 8,
2029 .valid
.accepts
= subpage_accepts
,
2030 .endianness
= DEVICE_NATIVE_ENDIAN
,
2033 static int subpage_register (subpage_t
*mmio
, uint32_t start
, uint32_t end
,
2038 if (start
>= TARGET_PAGE_SIZE
|| end
>= TARGET_PAGE_SIZE
)
2040 idx
= SUBPAGE_IDX(start
);
2041 eidx
= SUBPAGE_IDX(end
);
2042 #if defined(DEBUG_SUBPAGE)
2043 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2044 __func__
, mmio
, start
, end
, idx
, eidx
, section
);
2046 for (; idx
<= eidx
; idx
++) {
2047 mmio
->sub_section
[idx
] = section
;
2053 static subpage_t
*subpage_init(AddressSpace
*as
, hwaddr base
)
2057 mmio
= g_malloc0(sizeof(subpage_t
));
2061 memory_region_init_io(&mmio
->iomem
, NULL
, &subpage_ops
, mmio
,
2062 NULL
, TARGET_PAGE_SIZE
);
2063 mmio
->iomem
.subpage
= true;
2064 #if defined(DEBUG_SUBPAGE)
2065 printf("%s: %p base " TARGET_FMT_plx
" len %08x\n", __func__
,
2066 mmio
, base
, TARGET_PAGE_SIZE
);
2068 subpage_register(mmio
, 0, TARGET_PAGE_SIZE
-1, PHYS_SECTION_UNASSIGNED
);
2073 static uint16_t dummy_section(PhysPageMap
*map
, AddressSpace
*as
,
2077 MemoryRegionSection section
= {
2078 .address_space
= as
,
2080 .offset_within_address_space
= 0,
2081 .offset_within_region
= 0,
2082 .size
= int128_2_64(),
2085 return phys_section_add(map
, §ion
);
2088 MemoryRegion
*iotlb_to_region(CPUState
*cpu
, hwaddr index
)
2090 AddressSpaceDispatch
*d
= atomic_rcu_read(&cpu
->memory_dispatch
);
2091 MemoryRegionSection
*sections
= d
->map
.sections
;
2093 return sections
[index
& ~TARGET_PAGE_MASK
].mr
;
2096 static void io_mem_init(void)
2098 memory_region_init_io(&io_mem_rom
, NULL
, &unassigned_mem_ops
, NULL
, NULL
, UINT64_MAX
);
2099 memory_region_init_io(&io_mem_unassigned
, NULL
, &unassigned_mem_ops
, NULL
,
2101 memory_region_init_io(&io_mem_notdirty
, NULL
, ¬dirty_mem_ops
, NULL
,
2103 memory_region_init_io(&io_mem_watch
, NULL
, &watch_mem_ops
, NULL
,
2107 static void mem_begin(MemoryListener
*listener
)
2109 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
2110 AddressSpaceDispatch
*d
= g_new0(AddressSpaceDispatch
, 1);
2113 n
= dummy_section(&d
->map
, as
, &io_mem_unassigned
);
2114 assert(n
== PHYS_SECTION_UNASSIGNED
);
2115 n
= dummy_section(&d
->map
, as
, &io_mem_notdirty
);
2116 assert(n
== PHYS_SECTION_NOTDIRTY
);
2117 n
= dummy_section(&d
->map
, as
, &io_mem_rom
);
2118 assert(n
== PHYS_SECTION_ROM
);
2119 n
= dummy_section(&d
->map
, as
, &io_mem_watch
);
2120 assert(n
== PHYS_SECTION_WATCH
);
2122 d
->phys_map
= (PhysPageEntry
) { .ptr
= PHYS_MAP_NODE_NIL
, .skip
= 1 };
2124 as
->next_dispatch
= d
;
2127 static void address_space_dispatch_free(AddressSpaceDispatch
*d
)
2129 phys_sections_free(&d
->map
);
2133 static void mem_commit(MemoryListener
*listener
)
2135 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
2136 AddressSpaceDispatch
*cur
= as
->dispatch
;
2137 AddressSpaceDispatch
*next
= as
->next_dispatch
;
2139 phys_page_compact_all(next
, next
->map
.nodes_nb
);
2141 atomic_rcu_set(&as
->dispatch
, next
);
2143 call_rcu(cur
, address_space_dispatch_free
, rcu
);
2147 static void tcg_commit(MemoryListener
*listener
)
2151 /* since each CPU stores ram addresses in its TLB cache, we must
2152 reset the modified entries */
2155 /* FIXME: Disentangle the cpu.h circular files deps so we can
2156 directly get the right CPU from listener. */
2157 if (cpu
->tcg_as_listener
!= listener
) {
2160 cpu_reload_memory_map(cpu
);
2164 static void core_log_global_start(MemoryListener
*listener
)
2166 cpu_physical_memory_set_dirty_tracking(true);
2169 static void core_log_global_stop(MemoryListener
*listener
)
2171 cpu_physical_memory_set_dirty_tracking(false);
2174 static MemoryListener core_memory_listener
= {
2175 .log_global_start
= core_log_global_start
,
2176 .log_global_stop
= core_log_global_stop
,
2180 void address_space_init_dispatch(AddressSpace
*as
)
2182 as
->dispatch
= NULL
;
2183 as
->dispatch_listener
= (MemoryListener
) {
2185 .commit
= mem_commit
,
2186 .region_add
= mem_add
,
2187 .region_nop
= mem_add
,
2190 memory_listener_register(&as
->dispatch_listener
, as
);
2193 void address_space_unregister(AddressSpace
*as
)
2195 memory_listener_unregister(&as
->dispatch_listener
);
2198 void address_space_destroy_dispatch(AddressSpace
*as
)
2200 AddressSpaceDispatch
*d
= as
->dispatch
;
2202 atomic_rcu_set(&as
->dispatch
, NULL
);
2204 call_rcu(d
, address_space_dispatch_free
, rcu
);
2208 static void memory_map_init(void)
2210 system_memory
= g_malloc(sizeof(*system_memory
));
2212 memory_region_init(system_memory
, NULL
, "system", UINT64_MAX
);
2213 address_space_init(&address_space_memory
, system_memory
, "memory");
2215 system_io
= g_malloc(sizeof(*system_io
));
2216 memory_region_init_io(system_io
, NULL
, &unassigned_io_ops
, NULL
, "io",
2218 address_space_init(&address_space_io
, system_io
, "I/O");
2220 memory_listener_register(&core_memory_listener
, &address_space_memory
);
2223 MemoryRegion
*get_system_memory(void)
2225 return system_memory
;
2228 MemoryRegion
*get_system_io(void)
2233 #endif /* !defined(CONFIG_USER_ONLY) */
2235 /* physical memory access (slow version, mainly for debug) */
2236 #if defined(CONFIG_USER_ONLY)
2237 int cpu_memory_rw_debug(CPUState
*cpu
, target_ulong addr
,
2238 uint8_t *buf
, int len
, int is_write
)
2245 page
= addr
& TARGET_PAGE_MASK
;
2246 l
= (page
+ TARGET_PAGE_SIZE
) - addr
;
2249 flags
= page_get_flags(page
);
2250 if (!(flags
& PAGE_VALID
))
2253 if (!(flags
& PAGE_WRITE
))
2255 /* XXX: this code should not depend on lock_user */
2256 if (!(p
= lock_user(VERIFY_WRITE
, addr
, l
, 0)))
2259 unlock_user(p
, addr
, l
);
2261 if (!(flags
& PAGE_READ
))
2263 /* XXX: this code should not depend on lock_user */
2264 if (!(p
= lock_user(VERIFY_READ
, addr
, l
, 1)))
2267 unlock_user(p
, addr
, 0);
2278 static void invalidate_and_set_dirty(hwaddr addr
,
2281 if (cpu_physical_memory_range_includes_clean(addr
, length
)) {
2282 tb_invalidate_phys_range(addr
, addr
+ length
, 0);
2283 cpu_physical_memory_set_dirty_range_nocode(addr
, length
);
2285 xen_modified_memory(addr
, length
);
2288 static int memory_access_size(MemoryRegion
*mr
, unsigned l
, hwaddr addr
)
2290 unsigned access_size_max
= mr
->ops
->valid
.max_access_size
;
2292 /* Regions are assumed to support 1-4 byte accesses unless
2293 otherwise specified. */
2294 if (access_size_max
== 0) {
2295 access_size_max
= 4;
2298 /* Bound the maximum access by the alignment of the address. */
2299 if (!mr
->ops
->impl
.unaligned
) {
2300 unsigned align_size_max
= addr
& -addr
;
2301 if (align_size_max
!= 0 && align_size_max
< access_size_max
) {
2302 access_size_max
= align_size_max
;
2306 /* Don't attempt accesses larger than the maximum. */
2307 if (l
> access_size_max
) {
2308 l
= access_size_max
;
2311 l
= 1 << (qemu_fls(l
) - 1);
2317 MemTxResult
address_space_rw(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2318 uint8_t *buf
, int len
, bool is_write
)
2325 MemTxResult result
= MEMTX_OK
;
2330 mr
= address_space_translate(as
, addr
, &addr1
, &l
, is_write
);
2333 if (!memory_access_is_direct(mr
, is_write
)) {
2334 l
= memory_access_size(mr
, l
, addr1
);
2335 /* XXX: could force current_cpu to NULL to avoid
2339 /* 64 bit write access */
2341 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 8,
2345 /* 32 bit write access */
2347 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 4,
2351 /* 16 bit write access */
2353 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 2,
2357 /* 8 bit write access */
2359 result
|= memory_region_dispatch_write(mr
, addr1
, val
, 1,
2366 addr1
+= memory_region_get_ram_addr(mr
);
2368 ptr
= qemu_get_ram_ptr(addr1
);
2369 memcpy(ptr
, buf
, l
);
2370 invalidate_and_set_dirty(addr1
, l
);
2373 if (!memory_access_is_direct(mr
, is_write
)) {
2375 l
= memory_access_size(mr
, l
, addr1
);
2378 /* 64 bit read access */
2379 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 8,
2384 /* 32 bit read access */
2385 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 4,
2390 /* 16 bit read access */
2391 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 2,
2396 /* 8 bit read access */
2397 result
|= memory_region_dispatch_read(mr
, addr1
, &val
, 1,
2406 ptr
= qemu_get_ram_ptr(mr
->ram_addr
+ addr1
);
2407 memcpy(buf
, ptr
, l
);
2419 MemTxResult
address_space_write(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2420 const uint8_t *buf
, int len
)
2422 return address_space_rw(as
, addr
, attrs
, (uint8_t *)buf
, len
, true);
2425 MemTxResult
address_space_read(AddressSpace
*as
, hwaddr addr
, MemTxAttrs attrs
,
2426 uint8_t *buf
, int len
)
2428 return address_space_rw(as
, addr
, attrs
, buf
, len
, false);
2432 void cpu_physical_memory_rw(hwaddr addr
, uint8_t *buf
,
2433 int len
, int is_write
)
2435 address_space_rw(&address_space_memory
, addr
, MEMTXATTRS_UNSPECIFIED
,
2436 buf
, len
, is_write
);
2439 enum write_rom_type
{
2444 static inline void cpu_physical_memory_write_rom_internal(AddressSpace
*as
,
2445 hwaddr addr
, const uint8_t *buf
, int len
, enum write_rom_type type
)
2455 mr
= address_space_translate(as
, addr
, &addr1
, &l
, true);
2457 if (!(memory_region_is_ram(mr
) ||
2458 memory_region_is_romd(mr
))) {
2461 addr1
+= memory_region_get_ram_addr(mr
);
2463 ptr
= qemu_get_ram_ptr(addr1
);
2466 memcpy(ptr
, buf
, l
);
2467 invalidate_and_set_dirty(addr1
, l
);
2470 flush_icache_range((uintptr_t)ptr
, (uintptr_t)ptr
+ l
);
2481 /* used for ROM loading : can write in RAM and ROM */
2482 void cpu_physical_memory_write_rom(AddressSpace
*as
, hwaddr addr
,
2483 const uint8_t *buf
, int len
)
2485 cpu_physical_memory_write_rom_internal(as
, addr
, buf
, len
, WRITE_DATA
);
2488 void cpu_flush_icache_range(hwaddr start
, int len
)
2491 * This function should do the same thing as an icache flush that was
2492 * triggered from within the guest. For TCG we are always cache coherent,
2493 * so there is no need to flush anything. For KVM / Xen we need to flush
2494 * the host's instruction cache at least.
2496 if (tcg_enabled()) {
2500 cpu_physical_memory_write_rom_internal(&address_space_memory
,
2501 start
, NULL
, len
, FLUSH_CACHE
);
2512 static BounceBuffer bounce
;
2514 typedef struct MapClient
{
2516 QLIST_ENTRY(MapClient
) link
;
2519 QemuMutex map_client_list_lock
;
2520 static QLIST_HEAD(map_client_list
, MapClient
) map_client_list
2521 = QLIST_HEAD_INITIALIZER(map_client_list
);
2523 static void cpu_unregister_map_client_do(MapClient
*client
)
2525 QLIST_REMOVE(client
, link
);
2529 static void cpu_notify_map_clients_locked(void)
2533 while (!QLIST_EMPTY(&map_client_list
)) {
2534 client
= QLIST_FIRST(&map_client_list
);
2535 qemu_bh_schedule(client
->bh
);
2536 cpu_unregister_map_client_do(client
);
2540 void cpu_register_map_client(QEMUBH
*bh
)
2542 MapClient
*client
= g_malloc(sizeof(*client
));
2544 qemu_mutex_lock(&map_client_list_lock
);
2546 QLIST_INSERT_HEAD(&map_client_list
, client
, link
);
2547 if (!atomic_read(&bounce
.in_use
)) {
2548 cpu_notify_map_clients_locked();
2550 qemu_mutex_unlock(&map_client_list_lock
);
2553 void cpu_exec_init_all(void)
2555 qemu_mutex_init(&ram_list
.mutex
);
2558 qemu_mutex_init(&map_client_list_lock
);
2561 void cpu_unregister_map_client(QEMUBH
*bh
)
2565 qemu_mutex_lock(&map_client_list_lock
);
2566 QLIST_FOREACH(client
, &map_client_list
, link
) {
2567 if (client
->bh
== bh
) {
2568 cpu_unregister_map_client_do(client
);
2572 qemu_mutex_unlock(&map_client_list_lock
);
2575 static void cpu_notify_map_clients(void)
2577 qemu_mutex_lock(&map_client_list_lock
);
2578 cpu_notify_map_clients_locked();
2579 qemu_mutex_unlock(&map_client_list_lock
);
2582 bool address_space_access_valid(AddressSpace
*as
, hwaddr addr
, int len
, bool is_write
)
2590 mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2591 if (!memory_access_is_direct(mr
, is_write
)) {
2592 l
= memory_access_size(mr
, l
, addr
);
2593 if (!memory_region_access_valid(mr
, xlat
, l
, is_write
)) {
2605 /* Map a physical memory region into a host virtual address.
2606 * May map a subset of the requested range, given by and returned in *plen.
2607 * May return NULL if resources needed to perform the mapping are exhausted.
2608 * Use only for reads OR writes - not for read-modify-write operations.
2609 * Use cpu_register_map_client() to know when retrying the map operation is
2610 * likely to succeed.
2612 void *address_space_map(AddressSpace
*as
,
2619 hwaddr l
, xlat
, base
;
2620 MemoryRegion
*mr
, *this_mr
;
2629 mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2631 if (!memory_access_is_direct(mr
, is_write
)) {
2632 if (atomic_xchg(&bounce
.in_use
, true)) {
2636 /* Avoid unbounded allocations */
2637 l
= MIN(l
, TARGET_PAGE_SIZE
);
2638 bounce
.buffer
= qemu_memalign(TARGET_PAGE_SIZE
, l
);
2642 memory_region_ref(mr
);
2645 address_space_read(as
, addr
, MEMTXATTRS_UNSPECIFIED
,
2651 return bounce
.buffer
;
2655 raddr
= memory_region_get_ram_addr(mr
);
2666 this_mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2667 if (this_mr
!= mr
|| xlat
!= base
+ done
) {
2672 memory_region_ref(mr
);
2675 return qemu_ram_ptr_length(raddr
+ base
, plen
);
2678 /* Unmaps a memory region previously mapped by address_space_map().
2679 * Will also mark the memory as dirty if is_write == 1. access_len gives
2680 * the amount of memory that was actually read or written by the caller.
2682 void address_space_unmap(AddressSpace
*as
, void *buffer
, hwaddr len
,
2683 int is_write
, hwaddr access_len
)
2685 if (buffer
!= bounce
.buffer
) {
2689 mr
= qemu_ram_addr_from_host(buffer
, &addr1
);
2692 invalidate_and_set_dirty(addr1
, access_len
);
2694 if (xen_enabled()) {
2695 xen_invalidate_map_cache_entry(buffer
);
2697 memory_region_unref(mr
);
2701 address_space_write(as
, bounce
.addr
, MEMTXATTRS_UNSPECIFIED
,
2702 bounce
.buffer
, access_len
);
2704 qemu_vfree(bounce
.buffer
);
2705 bounce
.buffer
= NULL
;
2706 memory_region_unref(bounce
.mr
);
2707 atomic_mb_set(&bounce
.in_use
, false);
2708 cpu_notify_map_clients();
2711 void *cpu_physical_memory_map(hwaddr addr
,
2715 return address_space_map(&address_space_memory
, addr
, plen
, is_write
);
2718 void cpu_physical_memory_unmap(void *buffer
, hwaddr len
,
2719 int is_write
, hwaddr access_len
)
2721 return address_space_unmap(&address_space_memory
, buffer
, len
, is_write
, access_len
);
2724 /* warning: addr must be aligned */
2725 static inline uint32_t address_space_ldl_internal(AddressSpace
*as
, hwaddr addr
,
2727 MemTxResult
*result
,
2728 enum device_endian endian
)
2738 mr
= address_space_translate(as
, addr
, &addr1
, &l
, false);
2739 if (l
< 4 || !memory_access_is_direct(mr
, false)) {
2741 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 4, attrs
);
2742 #if defined(TARGET_WORDS_BIGENDIAN)
2743 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2747 if (endian
== DEVICE_BIG_ENDIAN
) {
2753 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2757 case DEVICE_LITTLE_ENDIAN
:
2758 val
= ldl_le_p(ptr
);
2760 case DEVICE_BIG_ENDIAN
:
2761 val
= ldl_be_p(ptr
);
2776 uint32_t address_space_ldl(AddressSpace
*as
, hwaddr addr
,
2777 MemTxAttrs attrs
, MemTxResult
*result
)
2779 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2780 DEVICE_NATIVE_ENDIAN
);
2783 uint32_t address_space_ldl_le(AddressSpace
*as
, hwaddr addr
,
2784 MemTxAttrs attrs
, MemTxResult
*result
)
2786 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2787 DEVICE_LITTLE_ENDIAN
);
2790 uint32_t address_space_ldl_be(AddressSpace
*as
, hwaddr addr
,
2791 MemTxAttrs attrs
, MemTxResult
*result
)
2793 return address_space_ldl_internal(as
, addr
, attrs
, result
,
2797 uint32_t ldl_phys(AddressSpace
*as
, hwaddr addr
)
2799 return address_space_ldl(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2802 uint32_t ldl_le_phys(AddressSpace
*as
, hwaddr addr
)
2804 return address_space_ldl_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2807 uint32_t ldl_be_phys(AddressSpace
*as
, hwaddr addr
)
2809 return address_space_ldl_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2812 /* warning: addr must be aligned */
2813 static inline uint64_t address_space_ldq_internal(AddressSpace
*as
, hwaddr addr
,
2815 MemTxResult
*result
,
2816 enum device_endian endian
)
2826 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2828 if (l
< 8 || !memory_access_is_direct(mr
, false)) {
2830 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 8, attrs
);
2831 #if defined(TARGET_WORDS_BIGENDIAN)
2832 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2836 if (endian
== DEVICE_BIG_ENDIAN
) {
2842 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2846 case DEVICE_LITTLE_ENDIAN
:
2847 val
= ldq_le_p(ptr
);
2849 case DEVICE_BIG_ENDIAN
:
2850 val
= ldq_be_p(ptr
);
2865 uint64_t address_space_ldq(AddressSpace
*as
, hwaddr addr
,
2866 MemTxAttrs attrs
, MemTxResult
*result
)
2868 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2869 DEVICE_NATIVE_ENDIAN
);
2872 uint64_t address_space_ldq_le(AddressSpace
*as
, hwaddr addr
,
2873 MemTxAttrs attrs
, MemTxResult
*result
)
2875 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2876 DEVICE_LITTLE_ENDIAN
);
2879 uint64_t address_space_ldq_be(AddressSpace
*as
, hwaddr addr
,
2880 MemTxAttrs attrs
, MemTxResult
*result
)
2882 return address_space_ldq_internal(as
, addr
, attrs
, result
,
2886 uint64_t ldq_phys(AddressSpace
*as
, hwaddr addr
)
2888 return address_space_ldq(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2891 uint64_t ldq_le_phys(AddressSpace
*as
, hwaddr addr
)
2893 return address_space_ldq_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2896 uint64_t ldq_be_phys(AddressSpace
*as
, hwaddr addr
)
2898 return address_space_ldq_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2902 uint32_t address_space_ldub(AddressSpace
*as
, hwaddr addr
,
2903 MemTxAttrs attrs
, MemTxResult
*result
)
2908 r
= address_space_rw(as
, addr
, attrs
, &val
, 1, 0);
2915 uint32_t ldub_phys(AddressSpace
*as
, hwaddr addr
)
2917 return address_space_ldub(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
2920 /* warning: addr must be aligned */
2921 static inline uint32_t address_space_lduw_internal(AddressSpace
*as
,
2924 MemTxResult
*result
,
2925 enum device_endian endian
)
2935 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2937 if (l
< 2 || !memory_access_is_direct(mr
, false)) {
2939 r
= memory_region_dispatch_read(mr
, addr1
, &val
, 2, attrs
);
2940 #if defined(TARGET_WORDS_BIGENDIAN)
2941 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2945 if (endian
== DEVICE_BIG_ENDIAN
) {
2951 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2955 case DEVICE_LITTLE_ENDIAN
:
2956 val
= lduw_le_p(ptr
);
2958 case DEVICE_BIG_ENDIAN
:
2959 val
= lduw_be_p(ptr
);
2974 uint32_t address_space_lduw(AddressSpace
*as
, hwaddr addr
,
2975 MemTxAttrs attrs
, MemTxResult
*result
)
2977 return address_space_lduw_internal(as
, addr
, attrs
, result
,
2978 DEVICE_NATIVE_ENDIAN
);
2981 uint32_t address_space_lduw_le(AddressSpace
*as
, hwaddr addr
,
2982 MemTxAttrs attrs
, MemTxResult
*result
)
2984 return address_space_lduw_internal(as
, addr
, attrs
, result
,
2985 DEVICE_LITTLE_ENDIAN
);
2988 uint32_t address_space_lduw_be(AddressSpace
*as
, hwaddr addr
,
2989 MemTxAttrs attrs
, MemTxResult
*result
)
2991 return address_space_lduw_internal(as
, addr
, attrs
, result
,
2995 uint32_t lduw_phys(AddressSpace
*as
, hwaddr addr
)
2997 return address_space_lduw(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3000 uint32_t lduw_le_phys(AddressSpace
*as
, hwaddr addr
)
3002 return address_space_lduw_le(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3005 uint32_t lduw_be_phys(AddressSpace
*as
, hwaddr addr
)
3007 return address_space_lduw_be(as
, addr
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3010 /* warning: addr must be aligned. The ram page is not masked as dirty
3011 and the code inside is not invalidated. It is useful if the dirty
3012 bits are used to track modified PTEs */
3013 void address_space_stl_notdirty(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3014 MemTxAttrs attrs
, MemTxResult
*result
)
3023 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
3025 if (l
< 4 || !memory_access_is_direct(mr
, true)) {
3026 r
= memory_region_dispatch_write(mr
, addr1
, val
, 4, attrs
);
3028 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3029 ptr
= qemu_get_ram_ptr(addr1
);
3032 if (unlikely(in_migration
)) {
3033 if (cpu_physical_memory_is_clean(addr1
)) {
3034 /* invalidate code */
3035 tb_invalidate_phys_page_range(addr1
, addr1
+ 4, 0);
3037 cpu_physical_memory_set_dirty_range_nocode(addr1
, 4);
3048 void stl_phys_notdirty(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3050 address_space_stl_notdirty(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3053 /* warning: addr must be aligned */
3054 static inline void address_space_stl_internal(AddressSpace
*as
,
3055 hwaddr addr
, uint32_t val
,
3057 MemTxResult
*result
,
3058 enum device_endian endian
)
3067 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
3069 if (l
< 4 || !memory_access_is_direct(mr
, true)) {
3070 #if defined(TARGET_WORDS_BIGENDIAN)
3071 if (endian
== DEVICE_LITTLE_ENDIAN
) {
3075 if (endian
== DEVICE_BIG_ENDIAN
) {
3079 r
= memory_region_dispatch_write(mr
, addr1
, val
, 4, attrs
);
3082 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3083 ptr
= qemu_get_ram_ptr(addr1
);
3085 case DEVICE_LITTLE_ENDIAN
:
3088 case DEVICE_BIG_ENDIAN
:
3095 invalidate_and_set_dirty(addr1
, 4);
3104 void address_space_stl(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3105 MemTxAttrs attrs
, MemTxResult
*result
)
3107 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3108 DEVICE_NATIVE_ENDIAN
);
3111 void address_space_stl_le(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3112 MemTxAttrs attrs
, MemTxResult
*result
)
3114 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3115 DEVICE_LITTLE_ENDIAN
);
3118 void address_space_stl_be(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3119 MemTxAttrs attrs
, MemTxResult
*result
)
3121 address_space_stl_internal(as
, addr
, val
, attrs
, result
,
3125 void stl_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3127 address_space_stl(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3130 void stl_le_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3132 address_space_stl_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3135 void stl_be_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3137 address_space_stl_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3141 void address_space_stb(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3142 MemTxAttrs attrs
, MemTxResult
*result
)
3147 r
= address_space_rw(as
, addr
, attrs
, &v
, 1, 1);
3153 void stb_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3155 address_space_stb(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3158 /* warning: addr must be aligned */
3159 static inline void address_space_stw_internal(AddressSpace
*as
,
3160 hwaddr addr
, uint32_t val
,
3162 MemTxResult
*result
,
3163 enum device_endian endian
)
3172 mr
= address_space_translate(as
, addr
, &addr1
, &l
, true);
3173 if (l
< 2 || !memory_access_is_direct(mr
, true)) {
3174 #if defined(TARGET_WORDS_BIGENDIAN)
3175 if (endian
== DEVICE_LITTLE_ENDIAN
) {
3179 if (endian
== DEVICE_BIG_ENDIAN
) {
3183 r
= memory_region_dispatch_write(mr
, addr1
, val
, 2, attrs
);
3186 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
3187 ptr
= qemu_get_ram_ptr(addr1
);
3189 case DEVICE_LITTLE_ENDIAN
:
3192 case DEVICE_BIG_ENDIAN
:
3199 invalidate_and_set_dirty(addr1
, 2);
3208 void address_space_stw(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3209 MemTxAttrs attrs
, MemTxResult
*result
)
3211 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3212 DEVICE_NATIVE_ENDIAN
);
3215 void address_space_stw_le(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3216 MemTxAttrs attrs
, MemTxResult
*result
)
3218 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3219 DEVICE_LITTLE_ENDIAN
);
3222 void address_space_stw_be(AddressSpace
*as
, hwaddr addr
, uint32_t val
,
3223 MemTxAttrs attrs
, MemTxResult
*result
)
3225 address_space_stw_internal(as
, addr
, val
, attrs
, result
,
3229 void stw_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3231 address_space_stw(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3234 void stw_le_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3236 address_space_stw_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3239 void stw_be_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3241 address_space_stw_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3245 void address_space_stq(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3246 MemTxAttrs attrs
, MemTxResult
*result
)
3250 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3256 void address_space_stq_le(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3257 MemTxAttrs attrs
, MemTxResult
*result
)
3260 val
= cpu_to_le64(val
);
3261 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3266 void address_space_stq_be(AddressSpace
*as
, hwaddr addr
, uint64_t val
,
3267 MemTxAttrs attrs
, MemTxResult
*result
)
3270 val
= cpu_to_be64(val
);
3271 r
= address_space_rw(as
, addr
, attrs
, (void *) &val
, 8, 1);
3277 void stq_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3279 address_space_stq(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3282 void stq_le_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3284 address_space_stq_le(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3287 void stq_be_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3289 address_space_stq_be(as
, addr
, val
, MEMTXATTRS_UNSPECIFIED
, NULL
);
3292 /* virtual memory access for debug (includes writing to ROM) */
3293 int cpu_memory_rw_debug(CPUState
*cpu
, target_ulong addr
,
3294 uint8_t *buf
, int len
, int is_write
)
3301 page
= addr
& TARGET_PAGE_MASK
;
3302 phys_addr
= cpu_get_phys_page_debug(cpu
, page
);
3303 /* if no physical page mapped, return an error */
3304 if (phys_addr
== -1)
3306 l
= (page
+ TARGET_PAGE_SIZE
) - addr
;
3309 phys_addr
+= (addr
& ~TARGET_PAGE_MASK
);
3311 cpu_physical_memory_write_rom(cpu
->as
, phys_addr
, buf
, l
);
3313 address_space_rw(cpu
->as
, phys_addr
, MEMTXATTRS_UNSPECIFIED
,
3325 * A helper function for the _utterly broken_ virtio device model to find out if
3326 * it's running on a big endian machine. Don't do this at home kids!
3328 bool target_words_bigendian(void);
3329 bool target_words_bigendian(void)
3331 #if defined(TARGET_WORDS_BIGENDIAN)
3338 #ifndef CONFIG_USER_ONLY
3339 bool cpu_physical_memory_is_io(hwaddr phys_addr
)
3346 mr
= address_space_translate(&address_space_memory
,
3347 phys_addr
, &phys_addr
, &l
, false);
3349 res
= !(memory_region_is_ram(mr
) || memory_region_is_romd(mr
));
3354 void qemu_ram_foreach_block(RAMBlockIterFunc func
, void *opaque
)
3359 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
3360 func(block
->host
, block
->offset
, block
->used_length
, opaque
);