2 * QEMU PAPR Storage Class Memory Interfaces
4 * Copyright (c) 2019-2020, IBM Corporation.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "qemu/osdep.h"
25 #include "qemu/cutils.h"
26 #include "qapi/error.h"
27 #include "hw/ppc/spapr_drc.h"
28 #include "hw/ppc/spapr_nvdimm.h"
29 #include "hw/mem/nvdimm.h"
30 #include "qemu/nvdimm-utils.h"
31 #include "hw/ppc/fdt.h"
32 #include "qemu/range.h"
33 #include "hw/ppc/spapr_numa.h"
34 #include "block/thread-pool.h"
35 #include "migration/vmstate.h"
36 #include "qemu/pmem.h"
37 #include "hw/qdev-properties.h"
39 /* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */
40 /* SCM device is unable to persist memory contents */
41 #define PAPR_PMEM_UNARMED PPC_BIT(0)
44 * The nvdimm size should be aligned to SCM block size.
45 * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE
46 * in order to have SCM regions not to overlap with dimm memory regions.
47 * The SCM devices can have variable block sizes. For now, fixing the
48 * block size to the minimum value.
50 #define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE
52 /* Have an explicit check for alignment */
53 QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE
% SPAPR_MEMORY_BLOCK_SIZE
);
55 #define TYPE_SPAPR_NVDIMM "spapr-nvdimm"
56 OBJECT_DECLARE_TYPE(SpaprNVDIMMDevice
, SPAPRNVDIMMClass
, SPAPR_NVDIMM
)
58 struct SPAPRNVDIMMClass
{
60 NVDIMMClass parent_class
;
63 void (*realize
)(NVDIMMDevice
*dimm
, Error
**errp
);
64 void (*unrealize
)(NVDIMMDevice
*dimm
, Error
**errp
);
67 bool spapr_nvdimm_validate(HotplugHandler
*hotplug_dev
, NVDIMMDevice
*nvdimm
,
68 uint64_t size
, Error
**errp
)
70 const MachineClass
*mc
= MACHINE_GET_CLASS(hotplug_dev
);
71 const MachineState
*ms
= MACHINE(hotplug_dev
);
72 PCDIMMDevice
*dimm
= PC_DIMM(nvdimm
);
73 MemoryRegion
*mr
= host_memory_backend_get_memory(dimm
->hostmem
);
74 g_autofree
char *uuidstr
= NULL
;
78 if (!mc
->nvdimm_supported
) {
79 error_setg(errp
, "NVDIMM hotplug not supported for this machine");
83 if (!ms
->nvdimms_state
->is_enabled
) {
84 error_setg(errp
, "nvdimm device found but 'nvdimm=off' was set");
88 if (object_property_get_int(OBJECT(nvdimm
), NVDIMM_LABEL_SIZE_PROP
,
90 error_setg(errp
, "PAPR requires NVDIMM devices to have label-size set");
94 if (size
% SPAPR_MINIMUM_SCM_BLOCK_SIZE
) {
95 error_setg(errp
, "PAPR requires NVDIMM memory size (excluding label)"
96 " to be a multiple of %" PRIu64
"MB",
97 SPAPR_MINIMUM_SCM_BLOCK_SIZE
/ MiB
);
101 uuidstr
= object_property_get_str(OBJECT(nvdimm
), NVDIMM_UUID_PROP
,
103 ret
= qemu_uuid_parse(uuidstr
, &uuid
);
106 if (qemu_uuid_is_null(&uuid
)) {
107 error_setg(errp
, "NVDIMM device requires the uuid to be set");
111 if (object_dynamic_cast(OBJECT(nvdimm
), TYPE_SPAPR_NVDIMM
) &&
112 (memory_region_get_fd(mr
) < 0)) {
113 error_setg(errp
, "spapr-nvdimm device requires the "
114 "memdev %s to be of memory-backend-file type",
115 object_get_canonical_path_component(OBJECT(dimm
->hostmem
)));
123 void spapr_add_nvdimm(DeviceState
*dev
, uint64_t slot
)
126 bool hotplugged
= spapr_drc_hotplugged(dev
);
128 drc
= spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM
, slot
);
132 * pc_dimm_get_free_slot() provided a free slot at pre-plug. The
133 * corresponding DRC is thus assumed to be attachable.
135 spapr_drc_attach(drc
, dev
);
138 spapr_hotplug_req_add_by_index(drc
);
142 static int spapr_dt_nvdimm(SpaprMachineState
*spapr
, void *fdt
,
143 int parent_offset
, NVDIMMDevice
*nvdimm
)
149 uint32_t node
= object_property_get_uint(OBJECT(nvdimm
), PC_DIMM_NODE_PROP
,
151 uint64_t slot
= object_property_get_uint(OBJECT(nvdimm
), PC_DIMM_SLOT_PROP
,
153 uint64_t lsize
= nvdimm
->label_size
;
154 uint64_t size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
157 drc
= spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM
, slot
);
160 drc_idx
= spapr_drc_index(drc
);
162 buf
= g_strdup_printf("ibm,pmemory@%x", drc_idx
);
163 child_offset
= fdt_add_subnode(fdt
, parent_offset
, buf
);
168 _FDT((fdt_setprop_cell(fdt
, child_offset
, "reg", drc_idx
)));
169 _FDT((fdt_setprop_string(fdt
, child_offset
, "compatible", "ibm,pmemory")));
170 _FDT((fdt_setprop_string(fdt
, child_offset
, "device_type", "ibm,pmemory")));
172 spapr_numa_write_associativity_dt(spapr
, fdt
, child_offset
, node
);
174 buf
= qemu_uuid_unparse_strdup(&nvdimm
->uuid
);
175 _FDT((fdt_setprop_string(fdt
, child_offset
, "ibm,unit-guid", buf
)));
178 _FDT((fdt_setprop_cell(fdt
, child_offset
, "ibm,my-drc-index", drc_idx
)));
180 _FDT((fdt_setprop_u64(fdt
, child_offset
, "ibm,block-size",
181 SPAPR_MINIMUM_SCM_BLOCK_SIZE
)));
182 _FDT((fdt_setprop_u64(fdt
, child_offset
, "ibm,number-of-blocks",
183 size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
)));
184 _FDT((fdt_setprop_cell(fdt
, child_offset
, "ibm,metadata-size", lsize
)));
186 _FDT((fdt_setprop_string(fdt
, child_offset
, "ibm,pmem-application",
187 "operating-system")));
188 _FDT(fdt_setprop(fdt
, child_offset
, "ibm,cache-flush-required", NULL
, 0));
190 if (object_dynamic_cast(OBJECT(nvdimm
), TYPE_SPAPR_NVDIMM
)) {
191 bool is_pmem
= false, pmem_override
= false;
192 PCDIMMDevice
*dimm
= PC_DIMM(nvdimm
);
193 HostMemoryBackend
*hostmem
= dimm
->hostmem
;
195 is_pmem
= object_property_get_bool(OBJECT(hostmem
), "pmem", NULL
);
196 pmem_override
= object_property_get_bool(OBJECT(nvdimm
),
197 "pmem-override", NULL
);
198 if (!is_pmem
|| pmem_override
) {
199 _FDT(fdt_setprop(fdt
, child_offset
, "ibm,hcall-flush-required",
207 int spapr_pmem_dt_populate(SpaprDrc
*drc
, SpaprMachineState
*spapr
,
208 void *fdt
, int *fdt_start_offset
, Error
**errp
)
210 NVDIMMDevice
*nvdimm
= NVDIMM(drc
->dev
);
212 *fdt_start_offset
= spapr_dt_nvdimm(spapr
, fdt
, 0, nvdimm
);
217 void spapr_dt_persistent_memory(SpaprMachineState
*spapr
, void *fdt
)
219 int offset
= fdt_subnode_offset(fdt
, 0, "ibm,persistent-memory");
220 GSList
*iter
, *nvdimms
= nvdimm_get_device_list();
223 offset
= fdt_add_subnode(fdt
, 0, "ibm,persistent-memory");
225 _FDT((fdt_setprop_cell(fdt
, offset
, "#address-cells", 0x1)));
226 _FDT((fdt_setprop_cell(fdt
, offset
, "#size-cells", 0x0)));
227 _FDT((fdt_setprop_string(fdt
, offset
, "device_type",
228 "ibm,persistent-memory")));
231 /* Create DT entries for cold plugged NVDIMM devices */
232 for (iter
= nvdimms
; iter
; iter
= iter
->next
) {
233 NVDIMMDevice
*nvdimm
= iter
->data
;
235 spapr_dt_nvdimm(spapr
, fdt
, offset
, nvdimm
);
237 g_slist_free(nvdimms
);
242 static target_ulong
h_scm_read_metadata(PowerPCCPU
*cpu
,
243 SpaprMachineState
*spapr
,
247 uint32_t drc_index
= args
[0];
248 uint64_t offset
= args
[1];
249 uint64_t len
= args
[2];
250 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
251 NVDIMMDevice
*nvdimm
;
254 uint8_t buf
[8] = { 0 };
256 if (!drc
|| !drc
->dev
||
257 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
261 if (len
!= 1 && len
!= 2 &&
262 len
!= 4 && len
!= 8) {
266 nvdimm
= NVDIMM(drc
->dev
);
267 if ((offset
+ len
< offset
) ||
268 (nvdimm
->label_size
< len
+ offset
)) {
272 ddc
= NVDIMM_GET_CLASS(nvdimm
);
273 ddc
->read_label_data(nvdimm
, buf
, len
, offset
);
280 data
= lduw_be_p(buf
);
283 data
= ldl_be_p(buf
);
286 data
= ldq_be_p(buf
);
289 g_assert_not_reached();
297 static target_ulong
h_scm_write_metadata(PowerPCCPU
*cpu
,
298 SpaprMachineState
*spapr
,
302 uint32_t drc_index
= args
[0];
303 uint64_t offset
= args
[1];
304 uint64_t data
= args
[2];
305 uint64_t len
= args
[3];
306 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
307 NVDIMMDevice
*nvdimm
;
309 uint8_t buf
[8] = { 0 };
311 if (!drc
|| !drc
->dev
||
312 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
316 if (len
!= 1 && len
!= 2 &&
317 len
!= 4 && len
!= 8) {
321 nvdimm
= NVDIMM(drc
->dev
);
322 if ((offset
+ len
< offset
) ||
323 (nvdimm
->label_size
< len
+ offset
) ||
330 if (data
& 0xffffffffffffff00) {
336 if (data
& 0xffffffffffff0000) {
342 if (data
& 0xffffffff00000000) {
351 g_assert_not_reached();
354 ddc
= NVDIMM_GET_CLASS(nvdimm
);
355 ddc
->write_label_data(nvdimm
, buf
, len
, offset
);
360 static target_ulong
h_scm_bind_mem(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
361 target_ulong opcode
, target_ulong
*args
)
363 uint32_t drc_index
= args
[0];
364 uint64_t starting_idx
= args
[1];
365 uint64_t no_of_scm_blocks_to_bind
= args
[2];
366 uint64_t target_logical_mem_addr
= args
[3];
367 uint64_t continue_token
= args
[4];
369 uint64_t total_no_of_scm_blocks
;
370 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
372 NVDIMMDevice
*nvdimm
;
374 if (!drc
|| !drc
->dev
||
375 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
380 * Currently continue token should be zero qemu has already bound
381 * everything and this hcall doesn't return H_BUSY.
383 if (continue_token
> 0) {
387 /* Currently qemu assigns the address. */
388 if (target_logical_mem_addr
!= 0xffffffffffffffff) {
392 nvdimm
= NVDIMM(drc
->dev
);
394 size
= object_property_get_uint(OBJECT(nvdimm
),
395 PC_DIMM_SIZE_PROP
, &error_abort
);
397 total_no_of_scm_blocks
= size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
399 if (starting_idx
> total_no_of_scm_blocks
) {
403 if (((starting_idx
+ no_of_scm_blocks_to_bind
) < starting_idx
) ||
404 ((starting_idx
+ no_of_scm_blocks_to_bind
) > total_no_of_scm_blocks
)) {
408 addr
= object_property_get_uint(OBJECT(nvdimm
),
409 PC_DIMM_ADDR_PROP
, &error_abort
);
411 addr
+= starting_idx
* SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
413 /* Already bound, Return target logical address in R5 */
415 args
[2] = no_of_scm_blocks_to_bind
;
420 typedef struct SpaprNVDIMMDeviceFlushState
{
421 uint64_t continue_token
;
425 QLIST_ENTRY(SpaprNVDIMMDeviceFlushState
) node
;
426 } SpaprNVDIMMDeviceFlushState
;
428 typedef struct SpaprNVDIMMDevice SpaprNVDIMMDevice
;
429 struct SpaprNVDIMMDevice
{
431 NVDIMMDevice parent_obj
;
433 bool hcall_flush_required
;
434 uint64_t nvdimm_flush_token
;
435 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState
) pending_nvdimm_flush_states
;
436 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState
) completed_nvdimm_flush_states
;
441 * The 'on' value for this property forced the qemu to enable the hcall
442 * flush for the nvdimm device even if the backend is a pmem
447 static int flush_worker_cb(void *opaque
)
449 SpaprNVDIMMDeviceFlushState
*state
= opaque
;
450 SpaprDrc
*drc
= spapr_drc_by_index(state
->drcidx
);
452 HostMemoryBackend
*backend
;
455 g_assert(drc
!= NULL
);
457 dimm
= PC_DIMM(drc
->dev
);
458 backend
= MEMORY_BACKEND(dimm
->hostmem
);
459 backend_fd
= memory_region_get_fd(&backend
->mr
);
461 if (object_property_get_bool(OBJECT(backend
), "pmem", NULL
)) {
462 MemoryRegion
*mr
= host_memory_backend_get_memory(dimm
->hostmem
);
463 void *ptr
= memory_region_get_ram_ptr(mr
);
464 size_t size
= object_property_get_uint(OBJECT(dimm
), PC_DIMM_SIZE_PROP
,
467 /* flush pmem backend */
468 pmem_persist(ptr
, size
);
470 /* flush raw backing image */
471 if (qemu_fdatasync(backend_fd
) < 0) {
472 error_report("papr_scm: Could not sync nvdimm to backend file: %s",
481 static void spapr_nvdimm_flush_completion_cb(void *opaque
, int hcall_ret
)
483 SpaprNVDIMMDeviceFlushState
*state
= opaque
;
484 SpaprDrc
*drc
= spapr_drc_by_index(state
->drcidx
);
485 SpaprNVDIMMDevice
*s_nvdimm
;
487 g_assert(drc
!= NULL
);
489 s_nvdimm
= SPAPR_NVDIMM(drc
->dev
);
491 state
->hcall_ret
= hcall_ret
;
492 QLIST_REMOVE(state
, node
);
493 QLIST_INSERT_HEAD(&s_nvdimm
->completed_nvdimm_flush_states
, state
, node
);
496 static int spapr_nvdimm_flush_post_load(void *opaque
, int version_id
)
498 SpaprNVDIMMDevice
*s_nvdimm
= (SpaprNVDIMMDevice
*)opaque
;
499 SpaprNVDIMMDeviceFlushState
*state
;
500 HostMemoryBackend
*backend
= MEMORY_BACKEND(PC_DIMM(s_nvdimm
)->hostmem
);
501 bool is_pmem
= object_property_get_bool(OBJECT(backend
), "pmem", NULL
);
502 bool pmem_override
= object_property_get_bool(OBJECT(s_nvdimm
),
503 "pmem-override", NULL
);
504 bool dest_hcall_flush_required
= pmem_override
|| !is_pmem
;
506 if (!s_nvdimm
->hcall_flush_required
&& dest_hcall_flush_required
) {
507 error_report("The file backend for the spapr-nvdimm device %s at "
508 "source is a pmem, use pmem=on and pmem-override=off to "
509 "continue.", DEVICE(s_nvdimm
)->id
);
512 if (s_nvdimm
->hcall_flush_required
&& !dest_hcall_flush_required
) {
513 error_report("The guest expects hcall-flush support for the "
514 "spapr-nvdimm device %s, use pmem_override=on to "
515 "continue.", DEVICE(s_nvdimm
)->id
);
519 QLIST_FOREACH(state
, &s_nvdimm
->pending_nvdimm_flush_states
, node
) {
520 thread_pool_submit_aio(flush_worker_cb
, state
,
521 spapr_nvdimm_flush_completion_cb
, state
);
527 static const VMStateDescription vmstate_spapr_nvdimm_flush_state
= {
528 .name
= "spapr_nvdimm_flush_state",
530 .minimum_version_id
= 1,
531 .fields
= (const VMStateField
[]) {
532 VMSTATE_UINT64(continue_token
, SpaprNVDIMMDeviceFlushState
),
533 VMSTATE_INT64(hcall_ret
, SpaprNVDIMMDeviceFlushState
),
534 VMSTATE_UINT32(drcidx
, SpaprNVDIMMDeviceFlushState
),
535 VMSTATE_END_OF_LIST()
539 const VMStateDescription vmstate_spapr_nvdimm_states
= {
540 .name
= "spapr_nvdimm_states",
542 .minimum_version_id
= 1,
543 .post_load
= spapr_nvdimm_flush_post_load
,
544 .fields
= (const VMStateField
[]) {
545 VMSTATE_BOOL(hcall_flush_required
, SpaprNVDIMMDevice
),
546 VMSTATE_UINT64(nvdimm_flush_token
, SpaprNVDIMMDevice
),
547 VMSTATE_QLIST_V(completed_nvdimm_flush_states
, SpaprNVDIMMDevice
, 1,
548 vmstate_spapr_nvdimm_flush_state
,
549 SpaprNVDIMMDeviceFlushState
, node
),
550 VMSTATE_QLIST_V(pending_nvdimm_flush_states
, SpaprNVDIMMDevice
, 1,
551 vmstate_spapr_nvdimm_flush_state
,
552 SpaprNVDIMMDeviceFlushState
, node
),
553 VMSTATE_END_OF_LIST()
558 * Assign a token and reserve it for the new flush state.
560 static SpaprNVDIMMDeviceFlushState
*spapr_nvdimm_init_new_flush_state(
561 SpaprNVDIMMDevice
*spapr_nvdimm
)
563 SpaprNVDIMMDeviceFlushState
*state
;
565 state
= g_malloc0(sizeof(*state
));
567 spapr_nvdimm
->nvdimm_flush_token
++;
568 /* Token zero is presumed as no job pending. Assert on overflow to zero */
569 g_assert(spapr_nvdimm
->nvdimm_flush_token
!= 0);
571 state
->continue_token
= spapr_nvdimm
->nvdimm_flush_token
;
573 QLIST_INSERT_HEAD(&spapr_nvdimm
->pending_nvdimm_flush_states
, state
, node
);
579 * spapr_nvdimm_finish_flushes
580 * Waits for all pending flush requests to complete
581 * their execution and free the states
583 void spapr_nvdimm_finish_flushes(void)
585 SpaprNVDIMMDeviceFlushState
*state
, *next
;
586 GSList
*list
, *nvdimms
;
589 * Called on reset path, the main loop thread which calls
590 * the pending BHs has gotten out running in the reset path,
591 * finally reaching here. Other code path being guest
592 * h_client_architecture_support, that's early boot up.
594 nvdimms
= nvdimm_get_device_list();
595 for (list
= nvdimms
; list
; list
= list
->next
) {
596 NVDIMMDevice
*nvdimm
= list
->data
;
597 if (object_dynamic_cast(OBJECT(nvdimm
), TYPE_SPAPR_NVDIMM
)) {
598 SpaprNVDIMMDevice
*s_nvdimm
= SPAPR_NVDIMM(nvdimm
);
599 while (!QLIST_EMPTY(&s_nvdimm
->pending_nvdimm_flush_states
)) {
600 aio_poll(qemu_get_aio_context(), true);
603 QLIST_FOREACH_SAFE(state
, &s_nvdimm
->completed_nvdimm_flush_states
,
605 QLIST_REMOVE(state
, node
);
610 g_slist_free(nvdimms
);
614 * spapr_nvdimm_get_flush_status
615 * Fetches the status of the hcall worker and returns
616 * H_LONG_BUSY_ORDER_10_MSEC if the worker is still running.
618 static int spapr_nvdimm_get_flush_status(SpaprNVDIMMDevice
*s_nvdimm
,
621 SpaprNVDIMMDeviceFlushState
*state
, *node
;
623 QLIST_FOREACH(state
, &s_nvdimm
->pending_nvdimm_flush_states
, node
) {
624 if (state
->continue_token
== token
) {
625 return H_LONG_BUSY_ORDER_10_MSEC
;
629 QLIST_FOREACH_SAFE(state
, &s_nvdimm
->completed_nvdimm_flush_states
,
631 if (state
->continue_token
== token
) {
632 int ret
= state
->hcall_ret
;
633 QLIST_REMOVE(state
, node
);
639 /* If not found in complete list too, invalid token */
645 * Input: drc_index, continue-token
646 * Out: continue-token
647 * Return Value: H_SUCCESS, H_Parameter, H_P2, H_LONG_BUSY_ORDER_10_MSEC,
650 * Given a DRC Index Flush the data to backend NVDIMM device. The hcall returns
651 * H_LONG_BUSY_ORDER_10_MSEC when the flush takes longer time and the hcall
652 * needs to be issued multiple times in order to be completely serviced. The
653 * continue-token from the output to be passed in the argument list of
654 * subsequent hcalls until the hcall is completely serviced at which point
655 * H_SUCCESS or other error is returned.
657 static target_ulong
h_scm_flush(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
658 target_ulong opcode
, target_ulong
*args
)
661 uint32_t drc_index
= args
[0];
662 uint64_t continue_token
= args
[1];
663 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
665 HostMemoryBackend
*backend
= NULL
;
666 SpaprNVDIMMDeviceFlushState
*state
;
669 if (!drc
|| !drc
->dev
||
670 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
674 dimm
= PC_DIMM(drc
->dev
);
675 if (!object_dynamic_cast(OBJECT(dimm
), TYPE_SPAPR_NVDIMM
)) {
678 if (continue_token
== 0) {
679 bool is_pmem
= false, pmem_override
= false;
680 backend
= MEMORY_BACKEND(dimm
->hostmem
);
681 fd
= memory_region_get_fd(&backend
->mr
);
684 return H_UNSUPPORTED
;
687 is_pmem
= object_property_get_bool(OBJECT(backend
), "pmem", NULL
);
688 pmem_override
= object_property_get_bool(OBJECT(dimm
),
689 "pmem-override", NULL
);
690 if (is_pmem
&& !pmem_override
) {
691 return H_UNSUPPORTED
;
694 state
= spapr_nvdimm_init_new_flush_state(SPAPR_NVDIMM(dimm
));
699 state
->drcidx
= drc_index
;
701 thread_pool_submit_aio(flush_worker_cb
, state
,
702 spapr_nvdimm_flush_completion_cb
, state
);
704 continue_token
= state
->continue_token
;
707 ret
= spapr_nvdimm_get_flush_status(SPAPR_NVDIMM(dimm
), continue_token
);
708 if (H_IS_LONG_BUSY(ret
)) {
709 args
[0] = continue_token
;
715 static target_ulong
h_scm_unbind_mem(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
716 target_ulong opcode
, target_ulong
*args
)
718 uint32_t drc_index
= args
[0];
719 uint64_t starting_scm_logical_addr
= args
[1];
720 uint64_t no_of_scm_blocks_to_unbind
= args
[2];
721 uint64_t continue_token
= args
[3];
722 uint64_t size_to_unbind
;
723 Range blockrange
= range_empty
;
724 Range nvdimmrange
= range_empty
;
725 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
726 NVDIMMDevice
*nvdimm
;
729 if (!drc
|| !drc
->dev
||
730 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
734 /* continue_token should be zero as this hcall doesn't return H_BUSY. */
735 if (continue_token
> 0) {
739 /* Check if starting_scm_logical_addr is block aligned */
740 if (!QEMU_IS_ALIGNED(starting_scm_logical_addr
,
741 SPAPR_MINIMUM_SCM_BLOCK_SIZE
)) {
745 size_to_unbind
= no_of_scm_blocks_to_unbind
* SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
746 if (no_of_scm_blocks_to_unbind
== 0 || no_of_scm_blocks_to_unbind
!=
747 size_to_unbind
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
) {
751 nvdimm
= NVDIMM(drc
->dev
);
752 size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
754 addr
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_ADDR_PROP
,
757 range_init_nofail(&nvdimmrange
, addr
, size
);
758 range_init_nofail(&blockrange
, starting_scm_logical_addr
, size_to_unbind
);
760 if (!range_contains_range(&nvdimmrange
, &blockrange
)) {
764 args
[1] = no_of_scm_blocks_to_unbind
;
766 /* let unplug take care of actual unbind */
770 #define H_UNBIND_SCOPE_ALL 0x1
771 #define H_UNBIND_SCOPE_DRC 0x2
773 static target_ulong
h_scm_unbind_all(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
774 target_ulong opcode
, target_ulong
*args
)
776 uint64_t target_scope
= args
[0];
777 uint32_t drc_index
= args
[1];
778 uint64_t continue_token
= args
[2];
779 NVDIMMDevice
*nvdimm
;
781 uint64_t no_of_scm_blocks_unbound
= 0;
783 /* continue_token should be zero as this hcall doesn't return H_BUSY. */
784 if (continue_token
> 0) {
788 if (target_scope
== H_UNBIND_SCOPE_DRC
) {
789 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
791 if (!drc
|| !drc
->dev
||
792 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
796 nvdimm
= NVDIMM(drc
->dev
);
797 size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
800 no_of_scm_blocks_unbound
= size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
801 } else if (target_scope
== H_UNBIND_SCOPE_ALL
) {
802 GSList
*list
, *nvdimms
;
804 nvdimms
= nvdimm_get_device_list();
805 for (list
= nvdimms
; list
; list
= list
->next
) {
807 size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
810 no_of_scm_blocks_unbound
+= size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
812 g_slist_free(nvdimms
);
817 args
[1] = no_of_scm_blocks_unbound
;
819 /* let unplug take care of actual unbind */
823 static target_ulong
h_scm_health(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
824 target_ulong opcode
, target_ulong
*args
)
827 NVDIMMDevice
*nvdimm
;
828 uint64_t hbitmap
= 0;
829 uint32_t drc_index
= args
[0];
830 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
831 const uint64_t hbitmap_mask
= PAPR_PMEM_UNARMED
;
834 /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */
835 if (!drc
|| !drc
->dev
||
836 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
840 nvdimm
= NVDIMM(drc
->dev
);
842 /* Update if the nvdimm is unarmed and send its status via health bitmaps */
843 if (object_property_get_bool(OBJECT(nvdimm
), NVDIMM_UNARMED_PROP
, NULL
)) {
844 hbitmap
|= PAPR_PMEM_UNARMED
;
847 /* Update the out args with health bitmap/mask */
849 args
[1] = hbitmap_mask
;
854 static void spapr_scm_register_types(void)
856 /* qemu/scm specific hcalls */
857 spapr_register_hypercall(H_SCM_READ_METADATA
, h_scm_read_metadata
);
858 spapr_register_hypercall(H_SCM_WRITE_METADATA
, h_scm_write_metadata
);
859 spapr_register_hypercall(H_SCM_BIND_MEM
, h_scm_bind_mem
);
860 spapr_register_hypercall(H_SCM_UNBIND_MEM
, h_scm_unbind_mem
);
861 spapr_register_hypercall(H_SCM_UNBIND_ALL
, h_scm_unbind_all
);
862 spapr_register_hypercall(H_SCM_HEALTH
, h_scm_health
);
863 spapr_register_hypercall(H_SCM_FLUSH
, h_scm_flush
);
866 type_init(spapr_scm_register_types
)
868 static void spapr_nvdimm_realize(NVDIMMDevice
*dimm
, Error
**errp
)
870 SpaprNVDIMMDevice
*s_nvdimm
= SPAPR_NVDIMM(dimm
);
871 HostMemoryBackend
*backend
= MEMORY_BACKEND(PC_DIMM(dimm
)->hostmem
);
872 bool is_pmem
= object_property_get_bool(OBJECT(backend
), "pmem", NULL
);
873 bool pmem_override
= object_property_get_bool(OBJECT(dimm
), "pmem-override",
875 if (!is_pmem
|| pmem_override
) {
876 s_nvdimm
->hcall_flush_required
= true;
879 vmstate_register_any(NULL
, &vmstate_spapr_nvdimm_states
, dimm
);
882 static void spapr_nvdimm_unrealize(NVDIMMDevice
*dimm
)
884 vmstate_unregister(NULL
, &vmstate_spapr_nvdimm_states
, dimm
);
887 static Property spapr_nvdimm_properties
[] = {
888 #ifdef CONFIG_LIBPMEM
889 DEFINE_PROP_BOOL("pmem-override", SpaprNVDIMMDevice
, pmem_override
, false),
891 DEFINE_PROP_END_OF_LIST(),
894 static void spapr_nvdimm_class_init(ObjectClass
*oc
, void *data
)
896 DeviceClass
*dc
= DEVICE_CLASS(oc
);
897 NVDIMMClass
*nvc
= NVDIMM_CLASS(oc
);
899 nvc
->realize
= spapr_nvdimm_realize
;
900 nvc
->unrealize
= spapr_nvdimm_unrealize
;
902 device_class_set_props(dc
, spapr_nvdimm_properties
);
905 static void spapr_nvdimm_init(Object
*obj
)
907 SpaprNVDIMMDevice
*s_nvdimm
= SPAPR_NVDIMM(obj
);
909 s_nvdimm
->hcall_flush_required
= false;
910 QLIST_INIT(&s_nvdimm
->pending_nvdimm_flush_states
);
911 QLIST_INIT(&s_nvdimm
->completed_nvdimm_flush_states
);
914 static TypeInfo spapr_nvdimm_info
= {
915 .name
= TYPE_SPAPR_NVDIMM
,
916 .parent
= TYPE_NVDIMM
,
917 .class_init
= spapr_nvdimm_class_init
,
918 .class_size
= sizeof(SPAPRNVDIMMClass
),
919 .instance_size
= sizeof(SpaprNVDIMMDevice
),
920 .instance_init
= spapr_nvdimm_init
,
923 static void spapr_nvdimm_register_types(void)
925 type_register_static(&spapr_nvdimm_info
);
928 type_init(spapr_nvdimm_register_types
)