2 * QEMU NVM Express End-to-End Data Protection support
4 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
7 * Klaus Jensen <k.jensen@samsung.com>
8 * Gollu Appalanaidu <anaidu.gollu@samsung.com>
11 #include "qemu/osdep.h"
12 #include "hw/block/block.h"
13 #include "sysemu/dma.h"
14 #include "sysemu/block-backend.h"
15 #include "qapi/error.h"
20 uint16_t nvme_check_prinfo(NvmeNamespace
*ns
, uint16_t ctrl
, uint64_t slba
,
23 if ((NVME_ID_NS_DPS_TYPE(ns
->id_ns
.dps
) == NVME_ID_NS_DPS_TYPE_1
) &&
24 (ctrl
& NVME_RW_PRINFO_PRCHK_REF
) && (slba
& 0xffffffff) != reftag
) {
25 return NVME_INVALID_PROT_INFO
| NVME_DNR
;
31 /* from Linux kernel (crypto/crct10dif_common.c) */
32 static uint16_t crc_t10dif(uint16_t crc
, const unsigned char *buffer
,
37 for (i
= 0; i
< len
; i
++) {
38 crc
= (crc
<< 8) ^ t10_dif_crc_table
[((crc
>> 8) ^ buffer
[i
]) & 0xff];
44 void nvme_dif_pract_generate_dif(NvmeNamespace
*ns
, uint8_t *buf
, size_t len
,
45 uint8_t *mbuf
, size_t mlen
, uint16_t apptag
,
48 uint8_t *end
= buf
+ len
;
49 size_t lsize
= nvme_lsize(ns
);
50 size_t msize
= nvme_msize(ns
);
53 if (!(ns
->id_ns
.dps
& NVME_ID_NS_DPS_FIRST_EIGHT
)) {
54 pil
= nvme_msize(ns
) - sizeof(NvmeDifTuple
);
57 trace_pci_nvme_dif_pract_generate_dif(len
, lsize
, lsize
+ pil
, apptag
,
60 for (; buf
< end
; buf
+= lsize
, mbuf
+= msize
) {
61 NvmeDifTuple
*dif
= (NvmeDifTuple
*)(mbuf
+ pil
);
62 uint16_t crc
= crc_t10dif(0x0, buf
, lsize
);
65 crc
= crc_t10dif(crc
, mbuf
, pil
);
68 dif
->guard
= cpu_to_be16(crc
);
69 dif
->apptag
= cpu_to_be16(apptag
);
70 dif
->reftag
= cpu_to_be32(reftag
);
72 if (NVME_ID_NS_DPS_TYPE(ns
->id_ns
.dps
) != NVME_ID_NS_DPS_TYPE_3
) {
78 static uint16_t nvme_dif_prchk(NvmeNamespace
*ns
, NvmeDifTuple
*dif
,
79 uint8_t *buf
, uint8_t *mbuf
, size_t pil
,
80 uint16_t ctrl
, uint16_t apptag
,
81 uint16_t appmask
, uint32_t reftag
)
83 switch (NVME_ID_NS_DPS_TYPE(ns
->id_ns
.dps
)) {
84 case NVME_ID_NS_DPS_TYPE_3
:
85 if (be32_to_cpu(dif
->reftag
) != 0xffffffff) {
90 case NVME_ID_NS_DPS_TYPE_1
:
91 case NVME_ID_NS_DPS_TYPE_2
:
92 if (be16_to_cpu(dif
->apptag
) != 0xffff) {
96 trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif
->apptag
),
97 be32_to_cpu(dif
->reftag
));
102 if (ctrl
& NVME_RW_PRINFO_PRCHK_GUARD
) {
103 uint16_t crc
= crc_t10dif(0x0, buf
, nvme_lsize(ns
));
106 crc
= crc_t10dif(crc
, mbuf
, pil
);
109 trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif
->guard
), crc
);
111 if (be16_to_cpu(dif
->guard
) != crc
) {
112 return NVME_E2E_GUARD_ERROR
;
116 if (ctrl
& NVME_RW_PRINFO_PRCHK_APP
) {
117 trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif
->apptag
), apptag
,
120 if ((be16_to_cpu(dif
->apptag
) & appmask
) != (apptag
& appmask
)) {
121 return NVME_E2E_APP_ERROR
;
125 if (ctrl
& NVME_RW_PRINFO_PRCHK_REF
) {
126 trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif
->reftag
), reftag
);
128 if (be32_to_cpu(dif
->reftag
) != reftag
) {
129 return NVME_E2E_REF_ERROR
;
136 uint16_t nvme_dif_check(NvmeNamespace
*ns
, uint8_t *buf
, size_t len
,
137 uint8_t *mbuf
, size_t mlen
, uint16_t ctrl
,
138 uint64_t slba
, uint16_t apptag
,
139 uint16_t appmask
, uint32_t reftag
)
141 uint8_t *end
= buf
+ len
;
142 size_t lsize
= nvme_lsize(ns
);
143 size_t msize
= nvme_msize(ns
);
147 status
= nvme_check_prinfo(ns
, ctrl
, slba
, reftag
);
152 if (!(ns
->id_ns
.dps
& NVME_ID_NS_DPS_FIRST_EIGHT
)) {
153 pil
= nvme_msize(ns
) - sizeof(NvmeDifTuple
);
156 trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl
), lsize
+ pil
);
158 for (; buf
< end
; buf
+= lsize
, mbuf
+= msize
) {
159 NvmeDifTuple
*dif
= (NvmeDifTuple
*)(mbuf
+ pil
);
161 status
= nvme_dif_prchk(ns
, dif
, buf
, mbuf
, pil
, ctrl
, apptag
,
167 if (NVME_ID_NS_DPS_TYPE(ns
->id_ns
.dps
) != NVME_ID_NS_DPS_TYPE_3
) {
175 uint16_t nvme_dif_mangle_mdata(NvmeNamespace
*ns
, uint8_t *mbuf
, size_t mlen
,
178 BlockBackend
*blk
= ns
->blkconf
.blk
;
179 BlockDriverState
*bs
= blk_bs(blk
);
181 size_t msize
= nvme_msize(ns
);
182 size_t lsize
= nvme_lsize(ns
);
183 int64_t moffset
= 0, offset
= nvme_l2b(ns
, slba
);
184 uint8_t *mbufp
, *end
;
187 int64_t bytes
= (mlen
/ msize
) * lsize
;
193 if (!(ns
->id_ns
.dps
& NVME_ID_NS_DPS_FIRST_EIGHT
)) {
194 pil
= nvme_msize(ns
) - sizeof(NvmeDifTuple
);
202 ret
= bdrv_block_status(bs
, offset
, bytes
, &pnum
, NULL
, NULL
);
204 error_setg_errno(&err
, -ret
, "unable to get block status");
205 error_report_err(err
);
207 return NVME_INTERNAL_DEV_ERROR
;
210 zeroed
= !!(ret
& BDRV_BLOCK_ZERO
);
212 trace_pci_nvme_block_status(offset
, bytes
, pnum
, ret
, zeroed
);
215 mbufp
= mbuf
+ moffset
;
216 mlen
= (pnum
/ lsize
) * msize
;
219 for (; mbufp
< end
; mbufp
+= msize
) {
220 memset(mbufp
+ pil
, 0xff, sizeof(NvmeDifTuple
));
224 moffset
+= (pnum
/ lsize
) * msize
;
226 } while (pnum
!= bytes
);
231 static void nvme_dif_rw_cb(void *opaque
, int ret
)
233 NvmeBounceContext
*ctx
= opaque
;
234 NvmeRequest
*req
= ctx
->req
;
235 NvmeNamespace
*ns
= req
->ns
;
236 BlockBackend
*blk
= ns
->blkconf
.blk
;
238 trace_pci_nvme_dif_rw_cb(nvme_cid(req
), blk_name(blk
));
240 qemu_iovec_destroy(&ctx
->data
.iov
);
241 g_free(ctx
->data
.bounce
);
243 qemu_iovec_destroy(&ctx
->mdata
.iov
);
244 g_free(ctx
->mdata
.bounce
);
248 nvme_rw_complete_cb(req
, ret
);
251 static void nvme_dif_rw_check_cb(void *opaque
, int ret
)
253 NvmeBounceContext
*ctx
= opaque
;
254 NvmeRequest
*req
= ctx
->req
;
255 NvmeNamespace
*ns
= req
->ns
;
256 NvmeCtrl
*n
= nvme_ctrl(req
);
257 NvmeRwCmd
*rw
= (NvmeRwCmd
*)&req
->cmd
;
258 uint64_t slba
= le64_to_cpu(rw
->slba
);
259 uint16_t ctrl
= le16_to_cpu(rw
->control
);
260 uint16_t apptag
= le16_to_cpu(rw
->apptag
);
261 uint16_t appmask
= le16_to_cpu(rw
->appmask
);
262 uint32_t reftag
= le32_to_cpu(rw
->reftag
);
265 trace_pci_nvme_dif_rw_check_cb(nvme_cid(req
), NVME_RW_PRINFO(ctrl
), apptag
,
272 status
= nvme_dif_mangle_mdata(ns
, ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
,
275 req
->status
= status
;
279 status
= nvme_dif_check(ns
, ctx
->data
.bounce
, ctx
->data
.iov
.size
,
280 ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
, ctrl
,
281 slba
, apptag
, appmask
, reftag
);
283 req
->status
= status
;
287 status
= nvme_bounce_data(n
, ctx
->data
.bounce
, ctx
->data
.iov
.size
,
288 NVME_TX_DIRECTION_FROM_DEVICE
, req
);
290 req
->status
= status
;
294 if (ctrl
& NVME_RW_PRINFO_PRACT
&& nvme_msize(ns
) == 8) {
298 status
= nvme_bounce_mdata(n
, ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
,
299 NVME_TX_DIRECTION_FROM_DEVICE
, req
);
301 req
->status
= status
;
305 nvme_dif_rw_cb(ctx
, ret
);
308 static void nvme_dif_rw_mdata_in_cb(void *opaque
, int ret
)
310 NvmeBounceContext
*ctx
= opaque
;
311 NvmeRequest
*req
= ctx
->req
;
312 NvmeNamespace
*ns
= req
->ns
;
313 NvmeRwCmd
*rw
= (NvmeRwCmd
*)&req
->cmd
;
314 uint64_t slba
= le64_to_cpu(rw
->slba
);
315 uint32_t nlb
= le16_to_cpu(rw
->nlb
) + 1;
316 size_t mlen
= nvme_m2b(ns
, nlb
);
317 uint64_t offset
= ns
->mdata_offset
+ nvme_m2b(ns
, slba
);
318 BlockBackend
*blk
= ns
->blkconf
.blk
;
320 trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req
), blk_name(blk
));
326 ctx
->mdata
.bounce
= g_malloc(mlen
);
328 qemu_iovec_reset(&ctx
->mdata
.iov
);
329 qemu_iovec_add(&ctx
->mdata
.iov
, ctx
->mdata
.bounce
, mlen
);
331 req
->aiocb
= blk_aio_preadv(blk
, offset
, &ctx
->mdata
.iov
, 0,
332 nvme_dif_rw_check_cb
, ctx
);
336 nvme_dif_rw_cb(ctx
, ret
);
339 static void nvme_dif_rw_mdata_out_cb(void *opaque
, int ret
)
341 NvmeBounceContext
*ctx
= opaque
;
342 NvmeRequest
*req
= ctx
->req
;
343 NvmeNamespace
*ns
= req
->ns
;
344 NvmeRwCmd
*rw
= (NvmeRwCmd
*)&req
->cmd
;
345 uint64_t slba
= le64_to_cpu(rw
->slba
);
346 uint64_t offset
= ns
->mdata_offset
+ nvme_m2b(ns
, slba
);
347 BlockBackend
*blk
= ns
->blkconf
.blk
;
349 trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req
), blk_name(blk
));
355 req
->aiocb
= blk_aio_pwritev(blk
, offset
, &ctx
->mdata
.iov
, 0,
356 nvme_dif_rw_cb
, ctx
);
360 nvme_dif_rw_cb(ctx
, ret
);
363 uint16_t nvme_dif_rw(NvmeCtrl
*n
, NvmeRequest
*req
)
365 NvmeRwCmd
*rw
= (NvmeRwCmd
*)&req
->cmd
;
366 NvmeNamespace
*ns
= req
->ns
;
367 BlockBackend
*blk
= ns
->blkconf
.blk
;
368 bool wrz
= rw
->opcode
== NVME_CMD_WRITE_ZEROES
;
369 uint32_t nlb
= le16_to_cpu(rw
->nlb
) + 1;
370 uint64_t slba
= le64_to_cpu(rw
->slba
);
371 size_t len
= nvme_l2b(ns
, nlb
);
372 size_t mlen
= nvme_m2b(ns
, nlb
);
373 size_t mapped_len
= len
;
374 int64_t offset
= nvme_l2b(ns
, slba
);
375 uint16_t ctrl
= le16_to_cpu(rw
->control
);
376 uint16_t apptag
= le16_to_cpu(rw
->apptag
);
377 uint16_t appmask
= le16_to_cpu(rw
->appmask
);
378 uint32_t reftag
= le32_to_cpu(rw
->reftag
);
379 bool pract
= !!(ctrl
& NVME_RW_PRINFO_PRACT
);
380 NvmeBounceContext
*ctx
;
383 trace_pci_nvme_dif_rw(pract
, NVME_RW_PRINFO(ctrl
));
385 ctx
= g_new0(NvmeBounceContext
, 1);
389 BdrvRequestFlags flags
= BDRV_REQ_MAY_UNMAP
;
391 if (ctrl
& NVME_RW_PRINFO_PRCHK_MASK
) {
392 status
= NVME_INVALID_PROT_INFO
| NVME_DNR
;
398 size_t msize
= nvme_msize(ns
);
399 int16_t pil
= msize
- sizeof(NvmeDifTuple
);
401 status
= nvme_check_prinfo(ns
, ctrl
, slba
, reftag
);
408 ctx
->mdata
.bounce
= g_malloc0(mlen
);
410 qemu_iovec_init(&ctx
->mdata
.iov
, 1);
411 qemu_iovec_add(&ctx
->mdata
.iov
, ctx
->mdata
.bounce
, mlen
);
413 mbuf
= ctx
->mdata
.bounce
;
416 if (ns
->id_ns
.dps
& NVME_ID_NS_DPS_FIRST_EIGHT
) {
420 for (; mbuf
< end
; mbuf
+= msize
) {
421 NvmeDifTuple
*dif
= (NvmeDifTuple
*)(mbuf
+ pil
);
423 dif
->apptag
= cpu_to_be16(apptag
);
424 dif
->reftag
= cpu_to_be32(reftag
);
426 switch (NVME_ID_NS_DPS_TYPE(ns
->id_ns
.dps
)) {
427 case NVME_ID_NS_DPS_TYPE_1
:
428 case NVME_ID_NS_DPS_TYPE_2
:
434 req
->aiocb
= blk_aio_pwrite_zeroes(blk
, offset
, len
, flags
,
435 nvme_dif_rw_mdata_out_cb
, ctx
);
436 return NVME_NO_COMPLETE
;
439 if (nvme_ns_ext(ns
) && !(pract
&& nvme_msize(ns
) == 8)) {
443 status
= nvme_map_dptr(n
, &req
->sg
, mapped_len
, &req
->cmd
);
448 ctx
->data
.bounce
= g_malloc(len
);
450 qemu_iovec_init(&ctx
->data
.iov
, 1);
451 qemu_iovec_add(&ctx
->data
.iov
, ctx
->data
.bounce
, len
);
453 if (req
->cmd
.opcode
== NVME_CMD_READ
) {
454 block_acct_start(blk_get_stats(blk
), &req
->acct
, ctx
->data
.iov
.size
,
457 req
->aiocb
= blk_aio_preadv(ns
->blkconf
.blk
, offset
, &ctx
->data
.iov
, 0,
458 nvme_dif_rw_mdata_in_cb
, ctx
);
459 return NVME_NO_COMPLETE
;
462 status
= nvme_bounce_data(n
, ctx
->data
.bounce
, ctx
->data
.iov
.size
,
463 NVME_TX_DIRECTION_TO_DEVICE
, req
);
468 ctx
->mdata
.bounce
= g_malloc(mlen
);
470 qemu_iovec_init(&ctx
->mdata
.iov
, 1);
471 qemu_iovec_add(&ctx
->mdata
.iov
, ctx
->mdata
.bounce
, mlen
);
473 if (!(pract
&& nvme_msize(ns
) == 8)) {
474 status
= nvme_bounce_mdata(n
, ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
,
475 NVME_TX_DIRECTION_TO_DEVICE
, req
);
481 status
= nvme_check_prinfo(ns
, ctrl
, slba
, reftag
);
487 /* splice generated protection information into the buffer */
488 nvme_dif_pract_generate_dif(ns
, ctx
->data
.bounce
, ctx
->data
.iov
.size
,
489 ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
,
492 status
= nvme_dif_check(ns
, ctx
->data
.bounce
, ctx
->data
.iov
.size
,
493 ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
, ctrl
,
494 slba
, apptag
, appmask
, reftag
);
500 block_acct_start(blk_get_stats(blk
), &req
->acct
, ctx
->data
.iov
.size
,
503 req
->aiocb
= blk_aio_pwritev(ns
->blkconf
.blk
, offset
, &ctx
->data
.iov
, 0,
504 nvme_dif_rw_mdata_out_cb
, ctx
);
506 return NVME_NO_COMPLETE
;
509 qemu_iovec_destroy(&ctx
->data
.iov
);
510 g_free(ctx
->data
.bounce
);
512 qemu_iovec_destroy(&ctx
->mdata
.iov
);
513 g_free(ctx
->mdata
.bounce
);