From 4b324362d54967a3ccfad8a9b113978ea1b38feb Mon Sep 17 00:00:00 2001 From: Hans Rosenfeld Date: Mon, 13 Feb 2017 14:44:28 +0100 Subject: [PATCH] 8628 nvme: use a semaphore to guard submission queue Reviewed by: Jerry Jelinek Reviewed by: Jason King Reviewed by: Robert Mustacchi Reviewed by: Garrett D'Amore Approved by: Richard Lowe --- usr/src/uts/common/io/nvme/nvme.c | 107 ++++++++++++++-------------------- usr/src/uts/common/io/nvme/nvme_var.h | 4 +- 2 files changed, 46 insertions(+), 65 deletions(-) diff --git a/usr/src/uts/common/io/nvme/nvme.c b/usr/src/uts/common/io/nvme/nvme.c index b148635e59..bcbe672041 100644 --- a/usr/src/uts/common/io/nvme/nvme.c +++ b/usr/src/uts/common/io/nvme/nvme.c @@ -44,7 +44,7 @@ * * Command Processing: * - * NVMe devices can have up to 65536 I/O queue pairs, with each queue holding up + * NVMe devices can have up to 65535 I/O queue pairs, with each queue holding up * to 65536 I/O commands. The driver will configure one I/O queue pair per * available interrupt vector, with the queue length usually much smaller than * the maximum of 65536. If the hardware doesn't provide enough queues, fewer @@ -69,7 +69,8 @@ * wraps around in that time a submission may find the next array slot to still * be used by a long-running command. In this case the array is sequentially * searched for the next free slot. The length of the command array is the same - * as the configured queue length. + * as the configured queue length. Queue overrun is prevented by the semaphore, + * so a command submission may block if the queue is full. * * * Polled I/O Support: @@ -257,7 +258,9 @@ static void nvme_free_cmd(nvme_cmd_t *); static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t, bd_xfer_t *); static int nvme_admin_cmd(nvme_cmd_t *, int); -static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *); +static void nvme_submit_admin_cmd(nvme_qpair_t *, nvme_cmd_t *); +static int nvme_submit_io_cmd(nvme_qpair_t *, nvme_cmd_t *); +static void nvme_submit_cmd_common(nvme_qpair_t *, nvme_cmd_t *); static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *); static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t); static void nvme_wakeup_cmd(void *); @@ -271,7 +274,7 @@ static int nvme_check_generic_cmd_status(nvme_cmd_t *); static inline int nvme_check_cmd_status(nvme_cmd_t *); static void nvme_abort_cmd(nvme_cmd_t *); -static int nvme_async_event(nvme_t *); +static void nvme_async_event(nvme_t *); static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t, boolean_t, uint8_t); static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...); @@ -721,6 +724,7 @@ nvme_free_qpair(nvme_qpair_t *qp) int i; mutex_destroy(&qp->nq_mutex); + sema_destroy(&qp->nq_sema); if (qp->nq_sqdma != NULL) nvme_free_dma(qp->nq_sqdma); @@ -746,6 +750,7 @@ nvme_alloc_qpair(nvme_t *nvme, uint32_t nentry, nvme_qpair_t **nqp, mutex_init(&qp->nq_mutex, NULL, MUTEX_DRIVER, DDI_INTR_PRI(nvme->n_intr_pri)); + sema_init(&qp->nq_sema, nentry, NULL, SEMA_DRIVER, NULL); if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_sqe_t), DDI_DMA_WRITE, &qp->nq_sqdma) != DDI_SUCCESS) @@ -812,18 +817,29 @@ nvme_free_cmd(nvme_cmd_t *cmd) kmem_cache_free(nvme_cmd_cache, cmd); } +static void +nvme_submit_admin_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd) +{ + sema_p(&qp->nq_sema); + nvme_submit_cmd_common(qp, cmd); +} + static int -nvme_submit_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd) +nvme_submit_io_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd) { - nvme_reg_sqtdbl_t tail = { 0 }; + if (sema_tryp(&qp->nq_sema) == 0) + return (EAGAIN); - mutex_enter(&qp->nq_mutex); + nvme_submit_cmd_common(qp, cmd); + return (0); +} - if (qp->nq_active_cmds == qp->nq_nentry) { - mutex_exit(&qp->nq_mutex); - return (DDI_FAILURE); - } +static void +nvme_submit_cmd_common(nvme_qpair_t *qp, nvme_cmd_t *cmd) +{ + nvme_reg_sqtdbl_t tail = { 0 }; + mutex_enter(&qp->nq_mutex); cmd->nc_completed = B_FALSE; /* @@ -849,7 +865,6 @@ nvme_submit_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd) nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r); mutex_exit(&qp->nq_mutex); - return (DDI_SUCCESS); } static nvme_cmd_t * @@ -895,6 +910,7 @@ nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp) nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r); mutex_exit(&qp->nq_mutex); + sema_v(&qp->nq_sema); return (cmd); } @@ -1363,7 +1379,6 @@ nvme_async_event_task(void *arg) nvme_health_log_t *health_log = NULL; size_t logsize = 0; nvme_async_event_t event; - int ret; /* * Check for errors associated with the async request itself. The only @@ -1397,14 +1412,7 @@ nvme_async_event_task(void *arg) /* Clear CQE and re-submit the async request. */ bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t)); - ret = nvme_submit_cmd(nvme->n_adminq, cmd); - - if (ret != DDI_SUCCESS) { - dev_err(nvme->n_dip, CE_WARN, - "!failed to resubmit async event request"); - atomic_inc_32(&nvme->n_async_resubmit_failed); - nvme_free_cmd(cmd); - } + nvme_submit_admin_cmd(nvme->n_adminq, cmd); switch (event.b.ae_type) { case NVME_ASYNC_TYPE_ERROR: @@ -1517,19 +1525,8 @@ nvme_async_event_task(void *arg) static int nvme_admin_cmd(nvme_cmd_t *cmd, int sec) { - int ret; - mutex_enter(&cmd->nc_mutex); - ret = nvme_submit_cmd(cmd->nc_nvme->n_adminq, cmd); - - if (ret != DDI_SUCCESS) { - mutex_exit(&cmd->nc_mutex); - dev_err(cmd->nc_nvme->n_dip, CE_WARN, - "!nvme_submit_cmd failed"); - atomic_inc_32(&cmd->nc_nvme->n_admin_queue_full); - nvme_free_cmd(cmd); - return (DDI_FAILURE); - } + nvme_submit_admin_cmd(cmd->nc_nvme->n_adminq, cmd); if (nvme_wait_cmd(cmd, sec) == B_FALSE) { /* @@ -1543,26 +1540,16 @@ nvme_admin_cmd(nvme_cmd_t *cmd, int sec) return (DDI_SUCCESS); } -static int +static void nvme_async_event(nvme_t *nvme) { nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); - int ret; cmd->nc_sqid = 0; cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT; cmd->nc_callback = nvme_async_event_task; - ret = nvme_submit_cmd(nvme->n_adminq, cmd); - - if (ret != DDI_SUCCESS) { - dev_err(nvme->n_dip, CE_WARN, - "!nvme_submit_cmd failed for ASYNCHRONOUS EVENT"); - nvme_free_cmd(cmd); - return (DDI_FAILURE); - } - - return (DDI_SUCCESS); + nvme_submit_admin_cmd(nvme->n_adminq, cmd); } static int @@ -2379,11 +2366,7 @@ nvme_init(nvme_t *nvme) /* * Post an asynchronous event command to catch errors. */ - if (nvme_async_event(nvme) != DDI_SUCCESS) { - dev_err(nvme->n_dip, CE_WARN, - "!failed to post async event"); - goto fail; - } + nvme_async_event(nvme); /* * Identify Controller @@ -2608,13 +2591,8 @@ nvme_init(nvme_t *nvme) * Post more asynchronous events commands to reduce event reporting * latency as suggested by the spec. */ - for (i = 1; i != nvme->n_async_event_limit; i++) { - if (nvme_async_event(nvme) != DDI_SUCCESS) { - dev_err(nvme->n_dip, CE_WARN, - "!failed to post async event %d", i); - goto fail; - } - } + for (i = 1; i != nvme->n_async_event_limit; i++) + nvme_async_event(nvme); return (DDI_SUCCESS); @@ -3278,9 +3256,10 @@ static int nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc) { nvme_t *nvme = ns->ns_nvme; - nvme_cmd_t *cmd, *ret; + nvme_cmd_t *cmd; nvme_qpair_t *ioq; boolean_t poll; + int ret; if (nvme->n_dead) return (EIO); @@ -3300,16 +3279,18 @@ nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc) */ poll = (xfer->x_flags & BD_XFER_POLL) != 0; - if (nvme_submit_cmd(ioq, cmd) != DDI_SUCCESS) - return (EAGAIN); + ret = nvme_submit_io_cmd(ioq, cmd); + + if (ret != 0) + return (ret); if (!poll) return (0); do { - ret = nvme_retrieve_cmd(nvme, ioq); - if (ret != NULL) - nvme_bd_xfer_done(ret); + cmd = nvme_retrieve_cmd(nvme, ioq); + if (cmd != NULL) + nvme_bd_xfer_done(cmd); else drv_usecwait(10); } while (ioq->nq_active_cmds != 0); diff --git a/usr/src/uts/common/io/nvme/nvme_var.h b/usr/src/uts/common/io/nvme/nvme_var.h index 651adaec8c..93f0cae064 100644 --- a/usr/src/uts/common/io/nvme/nvme_var.h +++ b/usr/src/uts/common/io/nvme/nvme_var.h @@ -12,6 +12,7 @@ /* * Copyright 2016 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 The MathWorks, Inc. All rights reserved. + * Copyright 2017 Joyent, Inc. */ #ifndef _NVME_VAR_H @@ -109,6 +110,7 @@ struct nvme_qpair { int nq_phase; kmutex_t nq_mutex; + ksema_t nq_sema; }; struct nvme { @@ -186,11 +188,9 @@ struct nvme { uint32_t n_abort_failed; uint32_t n_cmd_timeout; uint32_t n_cmd_aborted; - uint32_t n_async_resubmit_failed; uint32_t n_wrong_logpage; uint32_t n_unknown_logpage; uint32_t n_too_many_cookies; - uint32_t n_admin_queue_full; /* errors detected by hardware */ uint32_t n_data_xfr_err; -- 2.11.4.GIT