2 * Copyright (c) 2016 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 static void nvme_disk_callback(nvme_request_t
*req
, struct lock
*lk
);
38 static int nvme_strategy_core(nvme_softns_t
*nsc
, struct bio
*bio
, int delay
);
40 static d_open_t nvme_open
;
41 static d_close_t nvme_close
;
42 static d_ioctl_t nvme_ioctl
;
43 static d_strategy_t nvme_strategy
;
44 static d_dump_t nvme_dump
;
46 static struct dev_ops nvme_ops
= {
47 { "nvme", 0, D_DISK
| D_MPSAFE
| D_CANFREE
| D_TRACKCLOSE
},
49 .d_close
= nvme_close
,
53 .d_ioctl
= nvme_ioctl
,
54 .d_strategy
= nvme_strategy
,
57 static int nvme_sync_delay
= 0;
58 SYSCTL_INT(_debug
, OID_AUTO
, nvme_sync_delay
, CTLFLAG_RW
, &nvme_sync_delay
, 0,
59 "Enable synchronous delay/completion-check, uS");
62 * Attach a namespace as a disk, making the disk available to the system.
65 nvme_disk_attach(nvme_softns_t
*nsc
)
68 struct disk_info info
;
74 devstat_add_entry(&nsc
->stats
, "nvme", nsc
->unit
, nsc
->blksize
,
75 DEVSTAT_NO_ORDERED_TAGS
,
76 DEVSTAT_TYPE_DIRECT
| DEVSTAT_TYPE_IF_OTHER
,
77 DEVSTAT_PRIORITY_OTHER
);
78 nsc
->cdev
= disk_create(nsc
->unit
, &nsc
->disk
, &nvme_ops
);
79 nsc
->cdev
->si_drv1
= nsc
;
80 nsc
->cdev
->si_iosize_max
= MAXPHYS
; /* XXX */
81 disk_setdisktype(&nsc
->disk
, "ssd");
83 bzero(&info
, sizeof(info
));
84 info
.d_media_blksize
= nsc
->blksize
;
85 info
.d_media_blocks
= nsc
->idns
.size
;
86 info
.d_secpertrack
= 1024;
88 info
.d_secpercyl
= info
.d_secpertrack
* info
.d_nheads
;
89 info
.d_ncylinders
= (u_int
)(info
.d_media_blocks
/ info
.d_secpercyl
);
91 KKASSERT(sizeof(sc
->idctlr
.serialno
) == 20);
92 bzero(serial
, sizeof(serial
));
93 bcopy(sc
->idctlr
.serialno
, serial
, sizeof(sc
->idctlr
.serialno
));
94 len
= string_cleanup(serial
, 1);
96 ksnprintf(serial
+ len
, sizeof(serial
) - len
, "-%u", nsc
->nsid
);
98 info
.d_serialno
= serial
;
100 cap_gb
= nsc
->idns
.size
/ (1024 * 1024 * 1024 / nsc
->blksize
);
101 device_printf(sc
->dev
,
103 "blksize=%u lbacnt=%ju cap=%juGB serno=%s\n",
104 nsc
->unit
, nsc
->nsid
,
105 nsc
->blksize
, nsc
->idns
.size
, cap_gb
, serial
);
107 disk_setdiskinfo(&nsc
->disk
, &info
);
108 /* serial is copied and does not have to be persistent */
112 nvme_disk_detach(nvme_softns_t
*nsc
)
115 disk_destroy(&nsc
->disk
);
116 devstat_remove_entry(&nsc
->stats
);
122 nvme_open(struct dev_open_args
*ap
)
124 cdev_t dev
= ap
->a_head
.a_dev
;
125 nvme_softns_t
*nsc
= dev
->si_drv1
;
126 nvme_softc_t
*sc
= nsc
->sc
;
128 if (sc
->flags
& NVME_SC_UNLOADING
)
131 atomic_add_long(&sc
->opencnt
, 1);
138 nvme_close(struct dev_close_args
*ap
)
140 cdev_t dev
= ap
->a_head
.a_dev
;
141 nvme_softns_t
*nsc
= dev
->si_drv1
;
142 nvme_softc_t
*sc
= nsc
->sc
;
144 atomic_add_long(&sc
->opencnt
, -1);
150 nvme_ioctl(struct dev_ioctl_args
*ap
)
152 cdev_t dev
= ap
->a_head
.a_dev
;
153 nvme_softns_t
*nsc
= dev
->si_drv1
;
154 nvme_softc_t
*sc
= nsc
->sc
;
159 error
= nvme_getlog_ioctl(sc
, (void *)ap
->a_data
);
169 nvme_strategy(struct dev_strategy_args
*ap
)
171 cdev_t dev
= ap
->a_head
.a_dev
;
172 nvme_softns_t
*nsc
= dev
->si_drv1
;
174 nvme_strategy_core(nsc
, ap
->a_bio
, nvme_sync_delay
);
180 * Called from admin thread to requeue BIOs. We must call
181 * nvme_strategy_core() with delay = 0 to disable synchronous
182 * optimizations to avoid deadlocking the admin thread.
185 nvme_disk_requeues(nvme_softc_t
*sc
)
191 for (i
= 0; i
< sc
->nscmax
; ++i
) {
193 if (nsc
== NULL
|| nsc
->sc
== NULL
)
195 if (bioq_first(&nsc
->bioq
)) {
196 lockmgr(&nsc
->lk
, LK_EXCLUSIVE
);
197 while ((bio
= bioq_first(&nsc
->bioq
)) != NULL
) {
198 bioq_remove(&nsc
->bioq
, bio
);
199 lockmgr(&nsc
->lk
, LK_RELEASE
);
200 if (nvme_strategy_core(nsc
, bio
, 0))
202 lockmgr(&nsc
->lk
, LK_EXCLUSIVE
);
204 lockmgr(&nsc
->lk
, LK_RELEASE
);
213 * Returns non-zero if no requests are available.
216 nvme_strategy_core(nvme_softns_t
*nsc
, struct bio
*bio
, int delay
)
218 nvme_softc_t
*sc
= nsc
->sc
;
219 struct buf
*bp
= bio
->bio_buf
;
222 nvme_subqueue_t
*subq
;
227 * Calculate sector/extent
229 secno
= bio
->bio_offset
/ nsc
->blksize
;
230 nlba
= bp
->b_bcount
/ nsc
->blksize
;
232 devstat_start_transaction(&nsc
->stats
);
239 * Convert bio to low-level request
247 subq
= &sc
->subqueues
[sc
->qmap
[mycpuid
][NVME_QMAP_RD
]];
248 /* get_request does not need the subq lock */
249 req
= nvme_get_request(subq
, NVME_IOCMD_READ
,
250 bp
->b_data
, nlba
* nsc
->blksize
);
254 req
->cmd
.read
.head
.nsid
= nsc
->nsid
;
255 req
->cmd
.read
.start_lba
= secno
;
256 req
->cmd
.read
.count_lba
= nlba
- 1; /* 0's based */
257 req
->cmd
.read
.ioflags
= 0; /* NVME_IOFLG_LR, NVME_IOFLG_FUA */
258 req
->cmd
.read
.dsm
= 0; /* NVME_DSM_INCOMPRESSIBLE */
259 /* NVME_DSM_SEQREQ */
266 subq
= &sc
->subqueues
[sc
->qmap
[mycpuid
][NVME_QMAP_WR
]];
267 /* get_request does not need the subq lock */
268 req
= nvme_get_request(subq
, NVME_IOCMD_WRITE
,
269 bp
->b_data
, nlba
* nsc
->blksize
);
272 req
->cmd
.write
.head
.nsid
= nsc
->nsid
;
273 req
->cmd
.write
.start_lba
= secno
;
274 req
->cmd
.write
.count_lba
= nlba
- 1; /* 0's based */
276 case BUF_CMD_FREEBLKS
:
281 subq
= &sc
->subqueues
[sc
->qmap
[mycpuid
][NVME_QMAP_WR
]];
282 /* get_request does not need the subq lock */
283 req
= nvme_get_request(subq
, NVME_IOCMD_WRITEZ
, NULL
, 0);
286 req
->cmd
.writez
.head
.nsid
= nsc
->nsid
;
287 req
->cmd
.writez
.start_lba
= secno
;
288 req
->cmd
.writez
.count_lba
= nlba
- 1; /* 0's based */
289 req
->cmd
.read
.ioflags
= 0; /* NVME_IOFLG_LR, NVME_IOFLG_FUA */
290 req
->cmd
.read
.dsm
= 0; /* NVME_DSM_INCOMPRESSIBLE */
291 /* NVME_DSM_SEQREQ */
294 subq
= &sc
->subqueues
[sc
->qmap
[mycpuid
][NVME_QMAP_WR
]];
295 /* get_request does not need the subq lock */
296 req
= nvme_get_request(subq
, NVME_IOCMD_FLUSH
, NULL
, 0);
299 req
->cmd
.flush
.head
.nsid
= nsc
->nsid
;
309 nvme_comqueue_t
*comq
;
311 /* HACK OPTIMIZATIONS - TODO NEEDS WORK */
314 * Prevent callback from occurring if the synchronous
315 * delay optimization is enabled.
317 * NOTE: subq lock does not protect the I/O (completion
318 * only needs the comq lock).
321 req
->callback
= nvme_disk_callback
;
324 BUF_KERNPROC(bp
); /* do before submit */
325 lockmgr(&subq
->lk
, LK_EXCLUSIVE
);
326 nvme_submit_request(req
); /* needs subq lock */
327 lockmgr(&subq
->lk
, LK_RELEASE
);
330 DELAY(delay
); /* XXX */
331 lockmgr(&comq
->lk
, LK_EXCLUSIVE
);
332 nvme_poll_completions(comq
, &comq
->lk
);
333 if (req
->state
== NVME_REQ_SUBMITTED
) {
335 * Didn't finish, do it the slow way
336 * (restore async completion).
338 req
->callback
= nvme_disk_callback
;
339 lockmgr(&comq
->lk
, LK_RELEASE
);
342 * Jeeze, that was fast.
344 nvme_disk_callback(req
, &comq
->lk
);
345 lockmgr(&comq
->lk
, LK_RELEASE
);
347 } /* else async completion */
348 } else if (nobytes
) {
349 devstat_end_transaction_buf(&nsc
->stats
, bp
);
352 bp
->b_error
= EINVAL
;
353 bp
->b_flags
|= B_ERROR
;
354 devstat_end_transaction_buf(&nsc
->stats
, bp
);
360 * No requests were available, requeue the bio.
362 * The nvme_get_request() call armed the requeue signal but
363 * it is possible that it was picked up too quickly. If it
364 * was, signal the admin thread ourselves. This case will occur
365 * relatively rarely and only under heavy I/O conditions so we
366 * don't have to be entirely efficient about dealing with it.
370 lockmgr(&nsc
->lk
, LK_EXCLUSIVE
);
371 bioqdisksort(&nsc
->bioq
, bio
);
372 lockmgr(&nsc
->lk
, LK_RELEASE
);
373 if (atomic_swap_int(&subq
->signal_requeue
, 1) == 0) {
374 atomic_swap_int(&subq
->signal_requeue
, 0);
375 atomic_set_int(&subq
->sc
->admin_signal
, ADMIN_SIG_REQUEUE
);
376 wakeup(&subq
->sc
->admin_signal
);
383 nvme_disk_callback(nvme_request_t
*req
, struct lock
*lk
)
385 nvme_softns_t
*nsc
= req
->nsc
;
390 status
= NVME_COMQ_STATUS_CODE_GET(req
->res
.tail
.status
);
394 if (lk
) /* comq lock */
395 lockmgr(lk
, LK_RELEASE
);
396 nvme_put_request(req
); /* does not need subq lock */
397 devstat_end_transaction_buf(&nsc
->stats
, bp
);
400 bp
->b_flags
|= B_ERROR
;
406 if (lk
) /* comq lock */
407 lockmgr(lk
, LK_EXCLUSIVE
);
411 nvme_alloc_disk_unit(void)
413 static int unit_counter
= 0;
416 unit
= atomic_fetchadd_int(&unit_counter
, 1);
422 nvme_dump(struct dev_dump_args
*ap
)
424 cdev_t dev
= ap
->a_head
.a_dev
;
425 nvme_softns_t
*nsc
= dev
->si_drv1
;
426 nvme_softc_t
*sc
= nsc
->sc
;
429 nvme_subqueue_t
*subq
;
430 nvme_comqueue_t
*comq
;
434 * Calculate sector/extent
436 secno
= ap
->a_offset
/ nsc
->blksize
;
437 nlba
= ap
->a_length
/ nsc
->blksize
;
439 subq
= &sc
->subqueues
[sc
->qmap
[mycpuid
][NVME_QMAP_WR
]];
445 * get_request does not need the subq lock.
447 req
= nvme_get_request(subq
, NVME_IOCMD_WRITE
,
448 ap
->a_virtual
, nlba
* nsc
->blksize
);
449 req
->cmd
.write
.head
.nsid
= nsc
->nsid
;
450 req
->cmd
.write
.start_lba
= secno
;
451 req
->cmd
.write
.count_lba
= nlba
- 1; /* 0's based */
456 * get_request does not need the subq lock.
458 req
= nvme_get_request(subq
, NVME_IOCMD_FLUSH
, NULL
, 0);
459 req
->cmd
.flush
.head
.nsid
= nsc
->nsid
;
463 * Prevent callback from occurring if the synchronous
464 * delay optimization is enabled.
466 req
->callback
= NULL
;
468 lockmgr(&subq
->lk
, LK_EXCLUSIVE
);
469 nvme_submit_request(req
); /* needs subq lock */
470 lockmgr(&subq
->lk
, LK_RELEASE
);
473 nvme_wait_request(req
, 1);
474 nvme_put_request(req
); /* does not need subq lock */
477 * Shut the nvme controller down nicely when we finish the dump.
480 nvme_issue_shutdown(sc
);