2 * QEMU NVM Express End-to-End Data Protection support
4 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
7 * Klaus Jensen <k.jensen@samsung.com>
8 * Gollu Appalanaidu <anaidu.gollu@samsung.com>
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "sysemu/block-backend.h"
18 uint16_t nvme_check_prinfo(NvmeNamespace
*ns
, uint8_t prinfo
, uint64_t slba
,
21 if ((NVME_ID_NS_DPS_TYPE(ns
->id_ns
.dps
) == NVME_ID_NS_DPS_TYPE_1
) &&
22 (prinfo
& NVME_PRINFO_PRCHK_REF
) && (slba
& 0xffffffff) != reftag
) {
23 return NVME_INVALID_PROT_INFO
| NVME_DNR
;
29 /* from Linux kernel (crypto/crct10dif_common.c) */
30 static uint16_t crc_t10dif(uint16_t crc
, const unsigned char *buffer
,
35 for (i
= 0; i
< len
; i
++) {
36 crc
= (crc
<< 8) ^ t10_dif_crc_table
[((crc
>> 8) ^ buffer
[i
]) & 0xff];
42 void nvme_dif_pract_generate_dif(NvmeNamespace
*ns
, uint8_t *buf
, size_t len
,
43 uint8_t *mbuf
, size_t mlen
, uint16_t apptag
,
46 uint8_t *end
= buf
+ len
;
49 if (!(ns
->id_ns
.dps
& NVME_ID_NS_DPS_FIRST_EIGHT
)) {
50 pil
= ns
->lbaf
.ms
- sizeof(NvmeDifTuple
);
53 trace_pci_nvme_dif_pract_generate_dif(len
, ns
->lbasz
, ns
->lbasz
+ pil
,
56 for (; buf
< end
; buf
+= ns
->lbasz
, mbuf
+= ns
->lbaf
.ms
) {
57 NvmeDifTuple
*dif
= (NvmeDifTuple
*)(mbuf
+ pil
);
58 uint16_t crc
= crc_t10dif(0x0, buf
, ns
->lbasz
);
61 crc
= crc_t10dif(crc
, mbuf
, pil
);
64 dif
->guard
= cpu_to_be16(crc
);
65 dif
->apptag
= cpu_to_be16(apptag
);
66 dif
->reftag
= cpu_to_be32(*reftag
);
68 if (NVME_ID_NS_DPS_TYPE(ns
->id_ns
.dps
) != NVME_ID_NS_DPS_TYPE_3
) {
74 static uint16_t nvme_dif_prchk(NvmeNamespace
*ns
, NvmeDifTuple
*dif
,
75 uint8_t *buf
, uint8_t *mbuf
, size_t pil
,
76 uint8_t prinfo
, uint16_t apptag
,
77 uint16_t appmask
, uint32_t reftag
)
79 switch (NVME_ID_NS_DPS_TYPE(ns
->id_ns
.dps
)) {
80 case NVME_ID_NS_DPS_TYPE_3
:
81 if (be32_to_cpu(dif
->reftag
) != 0xffffffff) {
86 case NVME_ID_NS_DPS_TYPE_1
:
87 case NVME_ID_NS_DPS_TYPE_2
:
88 if (be16_to_cpu(dif
->apptag
) != 0xffff) {
92 trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif
->apptag
),
93 be32_to_cpu(dif
->reftag
));
98 if (prinfo
& NVME_PRINFO_PRCHK_GUARD
) {
99 uint16_t crc
= crc_t10dif(0x0, buf
, ns
->lbasz
);
102 crc
= crc_t10dif(crc
, mbuf
, pil
);
105 trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif
->guard
), crc
);
107 if (be16_to_cpu(dif
->guard
) != crc
) {
108 return NVME_E2E_GUARD_ERROR
;
112 if (prinfo
& NVME_PRINFO_PRCHK_APP
) {
113 trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif
->apptag
), apptag
,
116 if ((be16_to_cpu(dif
->apptag
) & appmask
) != (apptag
& appmask
)) {
117 return NVME_E2E_APP_ERROR
;
121 if (prinfo
& NVME_PRINFO_PRCHK_REF
) {
122 trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif
->reftag
), reftag
);
124 if (be32_to_cpu(dif
->reftag
) != reftag
) {
125 return NVME_E2E_REF_ERROR
;
132 uint16_t nvme_dif_check(NvmeNamespace
*ns
, uint8_t *buf
, size_t len
,
133 uint8_t *mbuf
, size_t mlen
, uint8_t prinfo
,
134 uint64_t slba
, uint16_t apptag
,
135 uint16_t appmask
, uint32_t *reftag
)
137 uint8_t *end
= buf
+ len
;
141 status
= nvme_check_prinfo(ns
, prinfo
, slba
, *reftag
);
146 if (!(ns
->id_ns
.dps
& NVME_ID_NS_DPS_FIRST_EIGHT
)) {
147 pil
= ns
->lbaf
.ms
- sizeof(NvmeDifTuple
);
150 trace_pci_nvme_dif_check(prinfo
, ns
->lbasz
+ pil
);
152 for (; buf
< end
; buf
+= ns
->lbasz
, mbuf
+= ns
->lbaf
.ms
) {
153 NvmeDifTuple
*dif
= (NvmeDifTuple
*)(mbuf
+ pil
);
155 status
= nvme_dif_prchk(ns
, dif
, buf
, mbuf
, pil
, prinfo
, apptag
,
161 if (NVME_ID_NS_DPS_TYPE(ns
->id_ns
.dps
) != NVME_ID_NS_DPS_TYPE_3
) {
169 uint16_t nvme_dif_mangle_mdata(NvmeNamespace
*ns
, uint8_t *mbuf
, size_t mlen
,
172 BlockBackend
*blk
= ns
->blkconf
.blk
;
173 BlockDriverState
*bs
= blk_bs(blk
);
175 int64_t moffset
= 0, offset
= nvme_l2b(ns
, slba
);
176 uint8_t *mbufp
, *end
;
179 int64_t bytes
= (mlen
/ ns
->lbaf
.ms
) << ns
->lbaf
.ds
;
185 if (!(ns
->id_ns
.dps
& NVME_ID_NS_DPS_FIRST_EIGHT
)) {
186 pil
= ns
->lbaf
.ms
- sizeof(NvmeDifTuple
);
194 ret
= bdrv_block_status(bs
, offset
, bytes
, &pnum
, NULL
, NULL
);
196 error_setg_errno(&err
, -ret
, "unable to get block status");
197 error_report_err(err
);
199 return NVME_INTERNAL_DEV_ERROR
;
202 zeroed
= !!(ret
& BDRV_BLOCK_ZERO
);
204 trace_pci_nvme_block_status(offset
, bytes
, pnum
, ret
, zeroed
);
207 mbufp
= mbuf
+ moffset
;
208 mlen
= (pnum
>> ns
->lbaf
.ds
) * ns
->lbaf
.ms
;
211 for (; mbufp
< end
; mbufp
+= ns
->lbaf
.ms
) {
212 memset(mbufp
+ pil
, 0xff, sizeof(NvmeDifTuple
));
216 moffset
+= (pnum
>> ns
->lbaf
.ds
) * ns
->lbaf
.ms
;
218 } while (pnum
!= bytes
);
223 static void nvme_dif_rw_cb(void *opaque
, int ret
)
225 NvmeBounceContext
*ctx
= opaque
;
226 NvmeRequest
*req
= ctx
->req
;
227 NvmeNamespace
*ns
= req
->ns
;
228 BlockBackend
*blk
= ns
->blkconf
.blk
;
230 trace_pci_nvme_dif_rw_cb(nvme_cid(req
), blk_name(blk
));
232 qemu_iovec_destroy(&ctx
->data
.iov
);
233 g_free(ctx
->data
.bounce
);
235 qemu_iovec_destroy(&ctx
->mdata
.iov
);
236 g_free(ctx
->mdata
.bounce
);
240 nvme_rw_complete_cb(req
, ret
);
243 static void nvme_dif_rw_check_cb(void *opaque
, int ret
)
245 NvmeBounceContext
*ctx
= opaque
;
246 NvmeRequest
*req
= ctx
->req
;
247 NvmeNamespace
*ns
= req
->ns
;
248 NvmeCtrl
*n
= nvme_ctrl(req
);
249 NvmeRwCmd
*rw
= (NvmeRwCmd
*)&req
->cmd
;
250 uint64_t slba
= le64_to_cpu(rw
->slba
);
251 uint8_t prinfo
= NVME_RW_PRINFO(le16_to_cpu(rw
->control
));
252 uint16_t apptag
= le16_to_cpu(rw
->apptag
);
253 uint16_t appmask
= le16_to_cpu(rw
->appmask
);
254 uint32_t reftag
= le32_to_cpu(rw
->reftag
);
257 trace_pci_nvme_dif_rw_check_cb(nvme_cid(req
), prinfo
, apptag
, appmask
,
264 status
= nvme_dif_mangle_mdata(ns
, ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
,
267 req
->status
= status
;
271 status
= nvme_dif_check(ns
, ctx
->data
.bounce
, ctx
->data
.iov
.size
,
272 ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
, prinfo
,
273 slba
, apptag
, appmask
, &reftag
);
275 req
->status
= status
;
279 status
= nvme_bounce_data(n
, ctx
->data
.bounce
, ctx
->data
.iov
.size
,
280 NVME_TX_DIRECTION_FROM_DEVICE
, req
);
282 req
->status
= status
;
286 if (prinfo
& NVME_PRINFO_PRACT
&& ns
->lbaf
.ms
== 8) {
290 status
= nvme_bounce_mdata(n
, ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
,
291 NVME_TX_DIRECTION_FROM_DEVICE
, req
);
293 req
->status
= status
;
297 nvme_dif_rw_cb(ctx
, ret
);
300 static void nvme_dif_rw_mdata_in_cb(void *opaque
, int ret
)
302 NvmeBounceContext
*ctx
= opaque
;
303 NvmeRequest
*req
= ctx
->req
;
304 NvmeNamespace
*ns
= req
->ns
;
305 NvmeRwCmd
*rw
= (NvmeRwCmd
*)&req
->cmd
;
306 uint64_t slba
= le64_to_cpu(rw
->slba
);
307 uint32_t nlb
= le16_to_cpu(rw
->nlb
) + 1;
308 size_t mlen
= nvme_m2b(ns
, nlb
);
309 uint64_t offset
= nvme_moff(ns
, slba
);
310 BlockBackend
*blk
= ns
->blkconf
.blk
;
312 trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req
), blk_name(blk
));
318 ctx
->mdata
.bounce
= g_malloc(mlen
);
320 qemu_iovec_reset(&ctx
->mdata
.iov
);
321 qemu_iovec_add(&ctx
->mdata
.iov
, ctx
->mdata
.bounce
, mlen
);
323 req
->aiocb
= blk_aio_preadv(blk
, offset
, &ctx
->mdata
.iov
, 0,
324 nvme_dif_rw_check_cb
, ctx
);
328 nvme_dif_rw_cb(ctx
, ret
);
331 static void nvme_dif_rw_mdata_out_cb(void *opaque
, int ret
)
333 NvmeBounceContext
*ctx
= opaque
;
334 NvmeRequest
*req
= ctx
->req
;
335 NvmeNamespace
*ns
= req
->ns
;
336 NvmeRwCmd
*rw
= (NvmeRwCmd
*)&req
->cmd
;
337 uint64_t slba
= le64_to_cpu(rw
->slba
);
338 uint64_t offset
= nvme_moff(ns
, slba
);
339 BlockBackend
*blk
= ns
->blkconf
.blk
;
341 trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req
), blk_name(blk
));
347 req
->aiocb
= blk_aio_pwritev(blk
, offset
, &ctx
->mdata
.iov
, 0,
348 nvme_dif_rw_cb
, ctx
);
352 nvme_dif_rw_cb(ctx
, ret
);
355 uint16_t nvme_dif_rw(NvmeCtrl
*n
, NvmeRequest
*req
)
357 NvmeRwCmd
*rw
= (NvmeRwCmd
*)&req
->cmd
;
358 NvmeNamespace
*ns
= req
->ns
;
359 BlockBackend
*blk
= ns
->blkconf
.blk
;
360 bool wrz
= rw
->opcode
== NVME_CMD_WRITE_ZEROES
;
361 uint32_t nlb
= le16_to_cpu(rw
->nlb
) + 1;
362 uint64_t slba
= le64_to_cpu(rw
->slba
);
363 size_t len
= nvme_l2b(ns
, nlb
);
364 size_t mlen
= nvme_m2b(ns
, nlb
);
365 size_t mapped_len
= len
;
366 int64_t offset
= nvme_l2b(ns
, slba
);
367 uint8_t prinfo
= NVME_RW_PRINFO(le16_to_cpu(rw
->control
));
368 uint16_t apptag
= le16_to_cpu(rw
->apptag
);
369 uint16_t appmask
= le16_to_cpu(rw
->appmask
);
370 uint32_t reftag
= le32_to_cpu(rw
->reftag
);
371 bool pract
= !!(prinfo
& NVME_PRINFO_PRACT
);
372 NvmeBounceContext
*ctx
;
375 trace_pci_nvme_dif_rw(pract
, prinfo
);
377 ctx
= g_new0(NvmeBounceContext
, 1);
381 BdrvRequestFlags flags
= BDRV_REQ_MAY_UNMAP
;
383 if (prinfo
& NVME_PRINFO_PRCHK_MASK
) {
384 status
= NVME_INVALID_PROT_INFO
| NVME_DNR
;
390 int16_t pil
= ns
->lbaf
.ms
- sizeof(NvmeDifTuple
);
392 status
= nvme_check_prinfo(ns
, prinfo
, slba
, reftag
);
399 ctx
->mdata
.bounce
= g_malloc0(mlen
);
401 qemu_iovec_init(&ctx
->mdata
.iov
, 1);
402 qemu_iovec_add(&ctx
->mdata
.iov
, ctx
->mdata
.bounce
, mlen
);
404 mbuf
= ctx
->mdata
.bounce
;
407 if (ns
->id_ns
.dps
& NVME_ID_NS_DPS_FIRST_EIGHT
) {
411 for (; mbuf
< end
; mbuf
+= ns
->lbaf
.ms
) {
412 NvmeDifTuple
*dif
= (NvmeDifTuple
*)(mbuf
+ pil
);
414 dif
->apptag
= cpu_to_be16(apptag
);
415 dif
->reftag
= cpu_to_be32(reftag
);
417 switch (NVME_ID_NS_DPS_TYPE(ns
->id_ns
.dps
)) {
418 case NVME_ID_NS_DPS_TYPE_1
:
419 case NVME_ID_NS_DPS_TYPE_2
:
425 req
->aiocb
= blk_aio_pwrite_zeroes(blk
, offset
, len
, flags
,
426 nvme_dif_rw_mdata_out_cb
, ctx
);
427 return NVME_NO_COMPLETE
;
430 if (nvme_ns_ext(ns
) && !(pract
&& ns
->lbaf
.ms
== 8)) {
434 status
= nvme_map_dptr(n
, &req
->sg
, mapped_len
, &req
->cmd
);
439 ctx
->data
.bounce
= g_malloc(len
);
441 qemu_iovec_init(&ctx
->data
.iov
, 1);
442 qemu_iovec_add(&ctx
->data
.iov
, ctx
->data
.bounce
, len
);
444 if (req
->cmd
.opcode
== NVME_CMD_READ
) {
445 block_acct_start(blk_get_stats(blk
), &req
->acct
, ctx
->data
.iov
.size
,
448 req
->aiocb
= blk_aio_preadv(ns
->blkconf
.blk
, offset
, &ctx
->data
.iov
, 0,
449 nvme_dif_rw_mdata_in_cb
, ctx
);
450 return NVME_NO_COMPLETE
;
453 status
= nvme_bounce_data(n
, ctx
->data
.bounce
, ctx
->data
.iov
.size
,
454 NVME_TX_DIRECTION_TO_DEVICE
, req
);
459 ctx
->mdata
.bounce
= g_malloc(mlen
);
461 qemu_iovec_init(&ctx
->mdata
.iov
, 1);
462 qemu_iovec_add(&ctx
->mdata
.iov
, ctx
->mdata
.bounce
, mlen
);
464 if (!(pract
&& ns
->lbaf
.ms
== 8)) {
465 status
= nvme_bounce_mdata(n
, ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
,
466 NVME_TX_DIRECTION_TO_DEVICE
, req
);
472 status
= nvme_check_prinfo(ns
, prinfo
, slba
, reftag
);
478 /* splice generated protection information into the buffer */
479 nvme_dif_pract_generate_dif(ns
, ctx
->data
.bounce
, ctx
->data
.iov
.size
,
480 ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
,
483 status
= nvme_dif_check(ns
, ctx
->data
.bounce
, ctx
->data
.iov
.size
,
484 ctx
->mdata
.bounce
, ctx
->mdata
.iov
.size
, prinfo
,
485 slba
, apptag
, appmask
, &reftag
);
491 block_acct_start(blk_get_stats(blk
), &req
->acct
, ctx
->data
.iov
.size
,
494 req
->aiocb
= blk_aio_pwritev(ns
->blkconf
.blk
, offset
, &ctx
->data
.iov
, 0,
495 nvme_dif_rw_mdata_out_cb
, ctx
);
497 return NVME_NO_COMPLETE
;
500 qemu_iovec_destroy(&ctx
->data
.iov
);
501 g_free(ctx
->data
.bounce
);
503 qemu_iovec_destroy(&ctx
->mdata
.iov
);
504 g_free(ctx
->mdata
.bounce
);