From ba0718a6d67130fd4bff664b86d2aa63fa49e241 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 08:14:46 +0100 Subject: [PATCH 01/22] nvme: don't call blk_mq_{,un}quiesce_tagset when ctrl->tagset is NULL The NVMe drivers support a mode where no tagset is allocated for the I/O queues and only the admin queue is usable. In that case ctrl->tagset is NULL and we must not call the block per-tagset quiesce helpers that dereference it. Fixes: 98d81f0df70c ("nvme: use blk_mq_[un]quiesce_tagset") Reported-by: Gerd Bayer Reported-by: Chao Leng Signed-off-by: Christoph Hellwig Reviewed-by: Chao Leng --- drivers/nvme/host/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3195ae17df30..fd2e26cb7a68 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -5215,6 +5215,8 @@ EXPORT_SYMBOL_GPL(nvme_start_freeze); void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl) { + if (!ctrl->tagset) + return; if (!test_and_set_bit(NVME_CTRL_STOPPED, &ctrl->flags)) blk_mq_quiesce_tagset(ctrl->tagset); else @@ -5224,6 +5226,8 @@ EXPORT_SYMBOL_GPL(nvme_quiesce_io_queues); void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl) { + if (!ctrl->tagset) + return; if (test_and_clear_bit(NVME_CTRL_STOPPED, &ctrl->flags)) blk_mq_unquiesce_tagset(ctrl->tagset); } From 99722c8aa8b994db15ef60965c33f6a8e399b36c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 24 Nov 2022 22:28:12 +0100 Subject: [PATCH 02/22] nvme: use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- drivers/nvme/target/configfs.c | 17 +++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bd5fcdc9211c..e0da4a6719a7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -2184,7 +2185,7 @@ static ssize_t hmb_store(struct device *dev, struct device_attribute *attr, bool new; int ret; - if (strtobool(buf, &new) < 0) + if (kstrtobool(buf, &new) < 0) return -EINVAL; if (new == ndev->hmb) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index d48deb9bdb27..682d88e5e43a 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -4,6 +4,7 @@ * Copyright (c) 2015-2016 HGST, a Western Digital Company. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -267,7 +268,7 @@ static ssize_t nvmet_param_pi_enable_store(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); bool val; - if (strtobool(page, &val)) + if (kstrtobool(page, &val)) return -EINVAL; if (nvmet_is_port_enabled(port, __func__)) @@ -532,7 +533,7 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item, bool enable; int ret = 0; - if (strtobool(page, &enable)) + if (kstrtobool(page, &enable)) return -EINVAL; if (enable) @@ -556,7 +557,7 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item, struct nvmet_ns *ns = to_nvmet_ns(item); bool val; - if (strtobool(page, &val)) + if (kstrtobool(page, &val)) return -EINVAL; mutex_lock(&ns->subsys->lock); @@ -579,7 +580,7 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item, struct nvmet_ns *ns = to_nvmet_ns(item); bool val; - if (strtobool(page, &val)) + if (kstrtobool(page, &val)) return -EINVAL; if (!val) @@ -728,7 +729,7 @@ static ssize_t nvmet_passthru_enable_store(struct config_item *item, bool enable; int ret = 0; - if (strtobool(page, &enable)) + if (kstrtobool(page, &enable)) return -EINVAL; if (enable) @@ -995,7 +996,7 @@ static ssize_t nvmet_subsys_attr_allow_any_host_store(struct config_item *item, bool allow_any_host; int ret = 0; - if (strtobool(page, &allow_any_host)) + if (kstrtobool(page, &allow_any_host)) return -EINVAL; down_write(&nvmet_config_sem); @@ -1382,7 +1383,7 @@ static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item, struct nvmet_subsys *subsys = to_subsys(item); bool pi_enable; - if (strtobool(page, &pi_enable)) + if (kstrtobool(page, &pi_enable)) return -EINVAL; subsys->pi_support = pi_enable; @@ -1511,7 +1512,7 @@ static ssize_t nvmet_referral_enable_store(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); bool enable; - if (strtobool(page, &enable)) + if (kstrtobool(page, &enable)) goto inval; if (enable) From 6887fc6495f2dfd55e088c982e983815278ee453 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 3 Oct 2022 12:43:43 +0300 Subject: [PATCH 03/22] nvme: introduce nvme_start_request In preparation for nvme-multipath IO stats accounting, we want the accounting to happen in a centralized place. The request completion is already centralized, but we need a common helper to request I/O start. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Hannes Reinecke --- drivers/nvme/host/apple.c | 2 +- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 2 +- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/host/tcp.c | 2 +- drivers/nvme/target/loop.c | 2 +- 7 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index cab69516af5b..94ef797e8b4a 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -763,7 +763,7 @@ static blk_status_t apple_nvme_queue_rq(struct blk_mq_hw_ctx *hctx, goto out_free_cmd; } - blk_mq_start_request(req); + nvme_start_request(req); apple_nvme_submit_cmd(q, cmnd); return BLK_STS_OK; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index aa5fb56c07d9..489f5e797204 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2733,7 +2733,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, atomic_set(&op->state, FCPOP_STATE_ACTIVE); if (!(op->flags & FCOP_FLAGS_AEN)) - blk_mq_start_request(op->rq); + nvme_start_request(op->rq); cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn)); ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b3a1c595d144..8522d6dc93e8 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1012,6 +1012,11 @@ static inline void nvme_hwmon_exit(struct nvme_ctrl *ctrl) } #endif +static inline void nvme_start_request(struct request *rq) +{ + blk_mq_start_request(rq); +} + static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl) { return ctrl->sgls & ((1 << 0) | (1 << 1)); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e0da4a6719a7..ac734c8f6640 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -907,7 +907,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) goto out_unmap_data; } - blk_mq_start_request(req); + nvme_start_request(req); return BLK_STS_OK; out_unmap_data: nvme_unmap_data(dev, req); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index de591cdf78f3..448abf8cdf1f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2040,7 +2040,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret) goto unmap_qe; - blk_mq_start_request(rq); + nvme_start_request(rq); if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && queue->pi_support && diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 776b8d9dfca7..79789daddeac 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2405,7 +2405,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(ret)) return ret; - blk_mq_start_request(rq); + nvme_start_request(rq); nvme_tcp_queue_request(req, true, bd->last); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 4173099ef9a4..6d176621f46d 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -145,7 +145,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret) return ret; - blk_mq_start_request(req); + nvme_start_request(req); iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; iod->req.port = queue->ctrl->port; if (!nvmet_req_init(&iod->req, &queue->nvme_cq, From d4d957b53d91eebc8c681c480edfdc697e55231e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 29 Nov 2022 15:43:19 +0100 Subject: [PATCH 04/22] nvme-multipath: support io stats on the mpath device Our mpath stack device is just a shim that selects a bottom namespace and submits the bio to it without any fancy splitting. This also means that we don't clone the bio or have any context to the bio beyond submission. However it really sucks that we don't see the mpath device io stats. Given that the mpath device can't do that without adding some context to it, we let the bottom device do it on its behalf (somewhat similar to the approach taken in nvme_trace_bio_complete). When the IO starts, we account the request for multipath IO stats using REQ_NVME_MPATH_IO_STATS nvme_request flag to avoid queue io stats disable in the middle of the request. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/multipath.c | 26 ++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 14 ++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd2e26cb7a68..7961e146bbb1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -384,6 +384,8 @@ static inline void nvme_end_req(struct request *req) nvme_log_error(req); nvme_end_req_zoned(req); nvme_trace_bio_complete(req); + if (req->cmd_flags & REQ_NVME_MPATH) + nvme_mpath_end_request(req); blk_mq_end_request(req, status); } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 0ea7e441e080..898921ee6cb6 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -114,6 +114,31 @@ void nvme_failover_req(struct request *req) kblockd_schedule_work(&ns->head->requeue_work); } +void nvme_mpath_start_request(struct request *rq) +{ + struct nvme_ns *ns = rq->q->queuedata; + struct gendisk *disk = ns->head->disk; + + if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq)) + return; + + nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; + nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, + blk_rq_bytes(rq) >> SECTOR_SHIFT, + req_op(rq), jiffies); +} +EXPORT_SYMBOL_GPL(nvme_mpath_start_request); + +void nvme_mpath_end_request(struct request *rq) +{ + struct nvme_ns *ns = rq->q->queuedata; + + if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) + return; + bdev_end_io_acct(ns->head->disk->part0, req_op(rq), + nvme_req(rq)->start_time); +} + void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; @@ -503,6 +528,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue); blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue); + blk_queue_flag_set(QUEUE_FLAG_IO_STAT, head->disk->queue); /* * This assumes all controllers that refer to a namespace either * support poll queues or not. That is not a strict guarantee, diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 8522d6dc93e8..e01fa9f696fa 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -162,6 +162,9 @@ struct nvme_request { u8 retries; u8 flags; u16 status; +#ifdef CONFIG_NVME_MULTIPATH + unsigned long start_time; +#endif struct nvme_ctrl *ctrl; }; @@ -173,6 +176,7 @@ struct nvme_request { enum { NVME_REQ_CANCELLED = (1 << 0), NVME_REQ_USERCMD = (1 << 1), + NVME_MPATH_IO_STATS = (1 << 2), }; static inline struct nvme_request *nvme_req(struct request *req) @@ -882,6 +886,8 @@ bool nvme_mpath_clear_current_path(struct nvme_ns *ns); void nvme_mpath_revalidate_paths(struct nvme_ns *ns); void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); void nvme_mpath_shutdown_disk(struct nvme_ns_head *head); +void nvme_mpath_start_request(struct request *rq); +void nvme_mpath_end_request(struct request *rq); static inline void nvme_trace_bio_complete(struct request *req) { @@ -967,6 +973,12 @@ static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) static inline void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys) { } +static inline void nvme_mpath_start_request(struct request *rq) +{ +} +static inline void nvme_mpath_end_request(struct request *rq) +{ +} #endif /* CONFIG_NVME_MULTIPATH */ int nvme_revalidate_zones(struct nvme_ns *ns); @@ -1014,6 +1026,8 @@ static inline void nvme_hwmon_exit(struct nvme_ctrl *ctrl) static inline void nvme_start_request(struct request *rq) { + if (rq->cmd_flags & REQ_NVME_MPATH) + nvme_mpath_start_request(rq); blk_mq_start_request(rq); } From ea43fceea4171f29457f8d46543ec320b777c1c7 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Mon, 5 Dec 2022 15:27:46 +0100 Subject: [PATCH 05/22] nvme: allow unprivileged passthrough of Identify Controller Add unprivileged passthrough of the I/O Command Set Independent and I/O Command Set Specific Identify Controller sub-command. This will allow access to attributes (e.g. MDTS and WZSL) that are needed to effectively form passthrough I/O to the /dev/ng* character devices. Signed-off-by: Joel Granados Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 9550a69029b3..9ddda571f046 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -34,6 +34,8 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, case NVME_ID_CNS_NS: case NVME_ID_CNS_CS_NS: case NVME_ID_CNS_NS_CS_INDEP: + case NVME_ID_CNS_CS_CTRL: + case NVME_ID_CNS_CTRL: return true; } } From 6c90294d72a99e3ed51516124ba0984e28937d2d Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 30 Nov 2022 13:33:40 -0800 Subject: [PATCH 06/22] nvme-fc: avoid null pointer dereference Before using dynamically allcoated variable lsop in the nvme_fc_rcv_ls_req(), add a check for NULL and error out early. Signed-off-by: Chaitanya Kulkarni Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 489f5e797204..06bae7150087 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1754,9 +1754,18 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, } lsop = kzalloc(sizeof(*lsop), GFP_KERNEL); + if (!lsop) { + dev_info(lport->dev, + "RCV %s LS failed: No memory\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -ENOMEM; + goto out_put; + } + lsop->rqstbuf = kzalloc(sizeof(*lsop->rqstbuf), GFP_KERNEL); lsop->rspbuf = kzalloc(sizeof(*lsop->rspbuf), GFP_KERNEL); - if (!lsop || !lsop->rqstbuf || !lsop->rspbuf) { + if (!lsop->rqstbuf || !lsop->rspbuf) { dev_info(lport->dev, "RCV %s LS failed: No memory\n", (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? From b2969585572e5ddaa239d53ee68e2824df7a7736 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 30 Nov 2022 13:33:41 -0800 Subject: [PATCH 07/22] nvme-fc: move common code into helper Add a helper to move the duplicate code for error message from nvme_fc_rcv_ls_req() to nvme_fc_rcv_ls_req_err_msg(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 06bae7150087..42fa450187f8 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1701,6 +1701,15 @@ restart: spin_unlock_irqrestore(&rport->lock, flags); } +static +void nvme_fc_rcv_ls_req_err_msg(struct nvme_fc_lport *lport, + struct fcnvme_ls_rqst_w0 *w0) +{ + dev_info(lport->dev, "RCV %s LS failed: No memory\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); +} + /** * nvme_fc_rcv_ls_req - transport entry point called by an LLDD * upon the reception of a NVME LS request. @@ -1755,10 +1764,7 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, lsop = kzalloc(sizeof(*lsop), GFP_KERNEL); if (!lsop) { - dev_info(lport->dev, - "RCV %s LS failed: No memory\n", - (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? - nvmefc_ls_names[w0->ls_cmd] : ""); + nvme_fc_rcv_ls_req_err_msg(lport, w0); ret = -ENOMEM; goto out_put; } @@ -1766,10 +1772,7 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, lsop->rqstbuf = kzalloc(sizeof(*lsop->rqstbuf), GFP_KERNEL); lsop->rspbuf = kzalloc(sizeof(*lsop->rspbuf), GFP_KERNEL); if (!lsop->rqstbuf || !lsop->rspbuf) { - dev_info(lport->dev, - "RCV %s LS failed: No memory\n", - (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? - nvmefc_ls_names[w0->ls_cmd] : ""); + nvme_fc_rcv_ls_req_err_msg(lport, w0); ret = -ENOMEM; goto out_free; } From c76b8308e4c9148e44e0c7e086ab6d8b4bb10162 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 11:14:09 +0100 Subject: [PATCH 08/22] nvme-apple: fix controller shutdown in apple_nvme_disable nvme_shutdown_ctrl already shuts the controller down, there is no need to also call nvme_disable_ctrl for the shutdown case. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Eric Curtin Reviewed-by: Sagi Grimberg Reviewed-by: Hector Martin --- drivers/nvme/host/apple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 94ef797e8b4a..56d9e9be945b 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -831,7 +831,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) if (shutdown) nvme_shutdown_ctrl(&anv->ctrl); - nvme_disable_ctrl(&anv->ctrl); + else + nvme_disable_ctrl(&anv->ctrl); } WRITE_ONCE(anv->ioq.enabled, false); From e6d275de2e4aaebdc97aa022c031a0dcc475efa4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 08:54:26 +0100 Subject: [PATCH 09/22] nvme: use nvme_wait_ready in nvme_shutdown_ctrl Refactor the code to wait for CSTS state changes so that it can be reused by nvme_shutdown_ctrl. This reduces the delay between each iteration that checks CSTS from 100ms in the shutdown code to the 1 to 2ms range done during enable, matching the changes from commit 3e98c2443f5c that were only applied to the enable/disable path. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Pankaj Raghav --- drivers/nvme/host/core.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7961e146bbb1..03b2e34dcf72 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2252,16 +2252,17 @@ static const struct block_device_operations nvme_bdev_ops = { .pr_ops = &nvme_pr_ops, }; -static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled) +static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 mask, u32 val, + u32 timeout, const char *op) { - unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies; - u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; + unsigned long timeout_jiffies = jiffies + timeout * HZ; + u32 csts; int ret; while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { if (csts == ~0) return -ENODEV; - if ((csts & NVME_CSTS_RDY) == bit) + if ((csts & mask) == val) break; usleep_range(1000, 2000); @@ -2270,7 +2271,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled) if (time_after(jiffies, timeout_jiffies)) { dev_err(ctrl->device, "Device not ready; aborting %s, CSTS=0x%x\n", - enabled ? "initialisation" : "reset", csts); + op, csts); return -ENODEV; } } @@ -2297,8 +2298,8 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl) if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); - - return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false); + return nvme_wait_ready(ctrl, NVME_CSTS_RDY, 0, + (NVME_CAP_TIMEOUT(ctrl->cap) + 1) / 2, "reset"); } EXPORT_SYMBOL_GPL(nvme_disable_ctrl); @@ -2363,14 +2364,13 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); if (ret) return ret; - return nvme_wait_ready(ctrl, timeout, true); + return nvme_wait_ready(ctrl, NVME_CSTS_RDY, NVME_CSTS_RDY, + (timeout + 1) / 2, "initialisation"); } EXPORT_SYMBOL_GPL(nvme_enable_ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) { - unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ); - u32 csts; int ret; ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; @@ -2379,22 +2379,8 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); if (ret) return ret; - - while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { - if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT) - break; - - msleep(100); - if (fatal_signal_pending(current)) - return -EINTR; - if (time_after(jiffies, timeout)) { - dev_err(ctrl->device, - "Device shutdown incomplete; abort shutdown\n"); - return -ENODEV; - } - } - - return ret; + return nvme_wait_ready(ctrl, NVME_CSTS_SHST_MASK, NVME_CSTS_SHST_CMPLT, + ctrl->shutdown_timeout, "shutdown"); } EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl); From 285b6e9b571714270fa503d0e32f244d15b9f85f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2022 11:20:12 +0100 Subject: [PATCH 10/22] nvme: merge nvme_shutdown_ctrl into nvme_disable_ctrl Many of the callers decide which one to use based on a bool argument and there is at least some code to be shared, so merge these two. Also move a comment specific to a single callsite to that callsite. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Hector Martin --- drivers/nvme/host/apple.c | 5 +---- drivers/nvme/host/core.c | 33 ++++++++++----------------------- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/nvme.h | 3 +-- drivers/nvme/host/pci.c | 15 +++++++++------ drivers/nvme/host/rdma.c | 5 +---- drivers/nvme/host/tcp.c | 5 +---- drivers/nvme/target/loop.c | 2 +- 8 files changed, 25 insertions(+), 45 deletions(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 56d9e9be945b..e36aeb50b4ed 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -829,10 +829,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) apple_nvme_remove_cq(anv); } - if (shutdown) - nvme_shutdown_ctrl(&anv->ctrl); - else - nvme_disable_ctrl(&anv->ctrl); + nvme_disable_ctrl(&anv->ctrl, shutdown); } WRITE_ONCE(anv->ioq.enabled, false); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 03b2e34dcf72..30717f7cfc94 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2279,23 +2279,25 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 mask, u32 val, return ret; } -/* - * If the device has been passed off to us in an enabled state, just clear - * the enabled bit. The spec says we should set the 'shutdown notification - * bits', but doing so may cause the device to complete commands to the - * admin queue ... and we don't know what memory that might be pointing at! - */ -int nvme_disable_ctrl(struct nvme_ctrl *ctrl) +int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown) { int ret; ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; - ctrl->ctrl_config &= ~NVME_CC_ENABLE; + if (shutdown) + ctrl->ctrl_config |= NVME_CC_SHN_NORMAL; + else + ctrl->ctrl_config &= ~NVME_CC_ENABLE; ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); if (ret) return ret; + if (shutdown) { + return nvme_wait_ready(ctrl, NVME_CSTS_SHST_MASK, + NVME_CSTS_SHST_CMPLT, + ctrl->shutdown_timeout, "shutdown"); + } if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); return nvme_wait_ready(ctrl, NVME_CSTS_RDY, 0, @@ -2369,21 +2371,6 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_enable_ctrl); -int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) -{ - int ret; - - ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; - ctrl->ctrl_config |= NVME_CC_SHN_NORMAL; - - ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); - if (ret) - return ret; - return nvme_wait_ready(ctrl, NVME_CSTS_SHST_MASK, NVME_CSTS_SHST_CMPLT, - ctrl->shutdown_timeout, "shutdown"); -} -EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl); - static int nvme_configure_timestamp(struct nvme_ctrl *ctrl) { __le64 ts; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 42fa450187f8..bb89c7f7196a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2518,7 +2518,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) * Other transports, which don't have link-level contexts bound * to sqe's, would try to gracefully shutdown the controller by * writing the registers for shutdown and polling (call - * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially + * nvme_disable_ctrl()). Given a bunch of i/o was potentially * just aborted and we will wait on those contexts, and given * there was no indication of how live the controlelr is on the * link, don't send more io to create more contexts for the diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index e01fa9f696fa..2cad9f6f2282 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -733,9 +733,8 @@ void nvme_cancel_tagset(struct nvme_ctrl *ctrl); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); -int nvme_disable_ctrl(struct nvme_ctrl *ctrl); +int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown); int nvme_enable_ctrl(struct nvme_ctrl *ctrl); -int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, const struct nvme_ctrl_ops *ops, unsigned long quirks); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ac734c8f6640..84226dce9b3b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1500,11 +1500,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; - if (shutdown) - nvme_shutdown_ctrl(&dev->ctrl); - else - nvme_disable_ctrl(&dev->ctrl); - + nvme_disable_ctrl(&dev->ctrl, shutdown); nvme_poll_irqdisable(nvmeq); } @@ -1819,7 +1815,14 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO)) writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS); - result = nvme_disable_ctrl(&dev->ctrl); + /* + * If the device has been passed off to us in an enabled state, just + * clear the enabled bit. The spec says we should set the 'shutdown + * notification bits', but doing so may cause the device to complete + * commands to the admin queue ... and we don't know what memory that + * might be pointing at! + */ + result = nvme_disable_ctrl(&dev->ctrl, false); if (result < 0) return result; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 448abf8cdf1f..cc61a1b8311b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2208,10 +2208,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { nvme_rdma_teardown_io_queues(ctrl, shutdown); nvme_quiesce_admin_queue(&ctrl->ctrl); - if (shutdown) - nvme_shutdown_ctrl(&ctrl->ctrl); - else - nvme_disable_ctrl(&ctrl->ctrl); + nvme_disable_ctrl(&ctrl->ctrl, shutdown); nvme_rdma_teardown_admin_queue(ctrl, shutdown); } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 79789daddeac..95e54e9c1bb1 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2142,10 +2142,7 @@ static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) { nvme_tcp_teardown_io_queues(ctrl, shutdown); nvme_quiesce_admin_queue(ctrl); - if (shutdown) - nvme_shutdown_ctrl(ctrl); - else - nvme_disable_ctrl(ctrl); + nvme_disable_ctrl(ctrl, shutdown); nvme_tcp_teardown_admin_queue(ctrl, shutdown); } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 6d176621f46d..0015aed5c169 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -401,7 +401,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) nvme_quiesce_admin_queue(&ctrl->ctrl); if (ctrl->ctrl.state == NVME_CTRL_LIVE) - nvme_shutdown_ctrl(&ctrl->ctrl); + nvme_disable_ctrl(&ctrl->ctrl, true); nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_loop_destroy_admin_queue(ctrl); From 47d42d229a181e52b32fa9c80aaa36cf8b6b46fc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 08:59:55 +0100 Subject: [PATCH 11/22] nvme-pci: remove nvme_disable_admin_queue nvme_disable_admin_queue has only a single caller, and just calls two other funtions, so remove it to clean up the remove path a little more. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Eric Curtin Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 84226dce9b3b..c6a02210d22b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1496,14 +1496,6 @@ static void nvme_suspend_io_queues(struct nvme_dev *dev) nvme_suspend_queue(&dev->queues[i]); } -static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) -{ - struct nvme_queue *nvmeq = &dev->queues[0]; - - nvme_disable_ctrl(&dev->ctrl, shutdown); - nvme_poll_irqdisable(nvmeq); -} - /* * Called only on a device that has been disabled and after all other threads * that can check this device's completion queues have synced, except @@ -2711,7 +2703,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (!dead && dev->ctrl.queue_count > 0) { nvme_disable_io_queues(dev); - nvme_disable_admin_queue(dev, shutdown); + nvme_disable_ctrl(&dev->ctrl, shutdown); + nvme_poll_irqdisable(&dev->queues[0]); } nvme_suspend_io_queues(dev); nvme_suspend_queue(&dev->queues[0]); From c80767f770ed722264688f65c89e420971eb897a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 09:01:30 +0100 Subject: [PATCH 12/22] nvme-pci: remove nvme_pci_disable nvme_pci_disable has a single caller, fold it into that. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Eric Curtin Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c6a02210d22b..c3d9b23699d3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2657,18 +2657,6 @@ static void nvme_dev_unmap(struct nvme_dev *dev) pci_release_mem_regions(to_pci_dev(dev->dev)); } -static void nvme_pci_disable(struct nvme_dev *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev->dev); - - pci_free_irq_vectors(pdev); - - if (pci_is_enabled(pdev)) { - pci_disable_pcie_error_reporting(pdev); - pci_disable_device(pdev); - } -} - static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { bool dead = true, freeze = false; @@ -2708,7 +2696,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) } nvme_suspend_io_queues(dev); nvme_suspend_queue(&dev->queues[0]); - nvme_pci_disable(dev); + pci_free_irq_vectors(pdev); + if (pci_is_enabled(pdev)) { + pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); + } nvme_reap_pending_cqes(dev); nvme_cancel_tagset(&dev->ctrl); From 10981f23a1b865290a5e21ad43a0f93710271cc6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Nov 2022 10:51:05 +0100 Subject: [PATCH 13/22] nvme-pci: cleanup nvme_suspend_queue Remove the unused returne value, pass a dev + qid instead of the queue as that is better for the callers as well as the function itself, and remove the entirely pointless kerneldoc comment. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c3d9b23699d3..2c8dd0f7ef46 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1468,14 +1468,12 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) } } -/** - * nvme_suspend_queue - put queue into suspended state - * @nvmeq: queue to suspend - */ -static int nvme_suspend_queue(struct nvme_queue *nvmeq) +static void nvme_suspend_queue(struct nvme_dev *dev, unsigned int qid) { + struct nvme_queue *nvmeq = &dev->queues[qid]; + if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags)) - return 1; + return; /* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */ mb(); @@ -1484,8 +1482,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) nvme_quiesce_admin_queue(&nvmeq->dev->ctrl); if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) - pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); - return 0; + pci_free_irq(to_pci_dev(dev->dev), nvmeq->cq_vector, nvmeq); } static void nvme_suspend_io_queues(struct nvme_dev *dev) @@ -1493,7 +1490,7 @@ static void nvme_suspend_io_queues(struct nvme_dev *dev) int i; for (i = dev->ctrl.queue_count - 1; i > 0; i--) - nvme_suspend_queue(&dev->queues[i]); + nvme_suspend_queue(dev, i); } /* @@ -2695,7 +2692,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_poll_irqdisable(&dev->queues[0]); } nvme_suspend_io_queues(dev); - nvme_suspend_queue(&dev->queues[0]); + nvme_suspend_queue(dev, 0); pci_free_irq_vectors(pdev); if (pci_is_enabled(pdev)) { pci_disable_pcie_error_reporting(pdev); From 7d879c90ae6cf58f4a4caad8562d700b95bc1dbe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 09:07:48 +0100 Subject: [PATCH 14/22] nvme-pci: rename nvme_disable_io_queues This function really deletes the I/O queues, so rename it to match the functionality. Also move the main wrapper right next to the actual underlying implementation. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2c8dd0f7ef46..1b527377e351 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -109,7 +109,7 @@ struct nvme_dev; struct nvme_queue; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); -static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode); +static void nvme_delete_io_queues(struct nvme_dev *dev); /* * Represents an NVM Express device. Each nvme_dev is a PCI function. @@ -2310,12 +2310,6 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); } -static void nvme_disable_io_queues(struct nvme_dev *dev) -{ - if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq)) - __nvme_disable_io_queues(dev, nvme_admin_delete_cq); -} - static unsigned int nvme_max_io_queues(struct nvme_dev *dev) { /* @@ -2423,7 +2417,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (dev->online_queues - 1 < dev->max_qid) { nr_io_queues = dev->online_queues - 1; - nvme_disable_io_queues(dev); + nvme_delete_io_queues(dev); result = nvme_setup_io_queues_trylock(dev); if (result) return result; @@ -2487,7 +2481,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return 0; } -static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) +static bool __nvme_delete_io_queues(struct nvme_dev *dev, u8 opcode) { int nr_queues = dev->online_queues - 1, sent = 0; unsigned long timeout; @@ -2515,6 +2509,12 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) return true; } +static void nvme_delete_io_queues(struct nvme_dev *dev) +{ + if (__nvme_delete_io_queues(dev, nvme_admin_delete_sq)) + __nvme_delete_io_queues(dev, nvme_admin_delete_cq); +} + static void nvme_pci_alloc_tag_set(struct nvme_dev *dev) { struct blk_mq_tag_set * set = &dev->tagset; @@ -2687,7 +2687,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_quiesce_io_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { - nvme_disable_io_queues(dev); + nvme_delete_io_queues(dev); nvme_disable_ctrl(&dev->ctrl, shutdown); nvme_poll_irqdisable(&dev->queues[0]); } From 8cb9f10b7151e308824cdcf3168010d673e7888c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Nov 2022 08:48:13 +0100 Subject: [PATCH 15/22] nvme-pci: return early on ctrl state mismatch in nvme_reset_work The only way nvme_reset_work could be called when not in resetting state is if a reset and remove happen near the same time. This should not happen, but if it did we don't want the reset work to disable the controller because the remove is already doing that. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1b527377e351..02940b4f42b1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2792,8 +2792,7 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.state != NVME_CTRL_RESETTING) { dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", dev->ctrl.state); - result = -ENODEV; - goto out; + return; } /* From 68e81eba6763cd834aa8ff9ac0cd6174fa69fa39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2022 09:09:48 +0100 Subject: [PATCH 16/22] nvme-pci: split out a nvme_pci_ctrl_is_dead helper Clean up nvme_dev_disable by splitting the logic to detect if a controller is dead into a separate helper. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 47 ++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 02940b4f42b1..d613b4292c0f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2654,36 +2654,39 @@ static void nvme_dev_unmap(struct nvme_dev *dev) pci_release_mem_regions(to_pci_dev(dev->dev)); } +static bool nvme_pci_ctrl_is_dead(struct nvme_dev *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + u32 csts; + + if (!pci_is_enabled(pdev) || !pci_device_is_present(pdev)) + return true; + if (pdev->error_state != pci_channel_io_normal) + return true; + + csts = readl(dev->bar + NVME_REG_CSTS); + return (csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY); +} + static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { - bool dead = true, freeze = false; struct pci_dev *pdev = to_pci_dev(dev->dev); + bool dead; mutex_lock(&dev->shutdown_lock); - if (pci_is_enabled(pdev)) { - u32 csts; - - if (pci_device_is_present(pdev)) - csts = readl(dev->bar + NVME_REG_CSTS); - else - csts = ~0; - - if (dev->ctrl.state == NVME_CTRL_LIVE || - dev->ctrl.state == NVME_CTRL_RESETTING) { - freeze = true; + dead = nvme_pci_ctrl_is_dead(dev); + if (dev->ctrl.state == NVME_CTRL_LIVE || + dev->ctrl.state == NVME_CTRL_RESETTING) { + if (pci_is_enabled(pdev)) nvme_start_freeze(&dev->ctrl); - } - dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || - pdev->error_state != pci_channel_io_normal); + /* + * Give the controller a chance to complete all entered requests + * if doing a safe shutdown. + */ + if (!dead && shutdown) + nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); } - /* - * Give the controller a chance to complete all entered requests if - * doing a safe shutdown. - */ - if (!dead && shutdown && freeze) - nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); - nvme_quiesce_io_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { From dcef77274ae52136925287b6b59d5c6e6a4adfb9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2022 17:16:52 +0100 Subject: [PATCH 17/22] nvme: pass nr_maps explicitly to nvme_alloc_io_tag_set Don't look at ctrl->ops as only RDMA and TCP actually support multiple maps. Fixes: 6dfba1c09c10 ("nvme-fc: use the tagset alloc/free helpers") Fixes: ceee1953f923 ("nvme-loop: use the tagset alloc/free helpers") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 5 ++--- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/rdma.c | 4 +++- drivers/nvme/host/tcp.c | 1 + drivers/nvme/target/loop.c | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 30717f7cfc94..3b369900928f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4891,7 +4891,7 @@ EXPORT_SYMBOL_GPL(nvme_remove_admin_tag_set); int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int flags, - unsigned int cmd_size) + unsigned int nr_maps, unsigned int cmd_size) { int ret; @@ -4905,8 +4905,7 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, set->driver_data = ctrl; set->nr_hw_queues = ctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; - if (ops->map_queues) - set->nr_maps = ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; + set->nr_maps = nr_maps; ret = blk_mq_alloc_tag_set(set); if (ret) return ret; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index bb89c7f7196a..1a4e009ca542 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2916,7 +2916,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) nvme_fc_init_io_queues(ctrl); ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, - &nvme_fc_mq_ops, BLK_MQ_F_SHOULD_MERGE, + &nvme_fc_mq_ops, BLK_MQ_F_SHOULD_MERGE, 1, struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, ctrl->lport->ops->fcprqst_priv_sz)); if (ret) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2cad9f6f2282..6c4565435fd9 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -747,7 +747,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl); int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int flags, - unsigned int cmd_size); + unsigned int nr_maps, unsigned int cmd_size); void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index cc61a1b8311b..cf8f500405b1 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -798,7 +798,9 @@ static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl) NVME_RDMA_METADATA_SGL_SIZE; return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set, - &nvme_rdma_mq_ops, BLK_MQ_F_SHOULD_MERGE, cmd_size); + &nvme_rdma_mq_ops, BLK_MQ_F_SHOULD_MERGE, + ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2, + cmd_size); } static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 95e54e9c1bb1..fa245a50f630 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1859,6 +1859,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set, &nvme_tcp_mq_ops, BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING, + ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2, sizeof(struct nvme_tcp_request)); if (ret) goto out_free_io_queues; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 0015aed5c169..da32727e6232 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -494,7 +494,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) return ret; ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, - &nvme_loop_mq_ops, BLK_MQ_F_SHOULD_MERGE, + &nvme_loop_mq_ops, BLK_MQ_F_SHOULD_MERGE, 1, sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (ret) From db45e1a5ddccc034eb60d62fc5352022d7963ae2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2022 17:19:50 +0100 Subject: [PATCH 18/22] nvme: consolidate setting the tagset flags All nvme transports should be using the same flags for their tagsets, with the exception for the blocking flag that should only be set for transports that can block in ->queue_rq. Add a NVME_F_BLOCKING flag to nvme_ctrl_ops to control the blocking behavior and lift setting the flags into nvme_alloc_{admin,io}_tag_set. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 15 +++++++++------ drivers/nvme/host/fc.c | 4 ++-- drivers/nvme/host/nvme.h | 9 +++++---- drivers/nvme/host/rdma.c | 3 +-- drivers/nvme/host/tcp.c | 5 ++--- drivers/nvme/target/loop.c | 4 ++-- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3b369900928f..f31586c46893 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4831,8 +4831,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, EXPORT_SYMBOL_GPL(nvme_complete_async_event); int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, - const struct blk_mq_ops *ops, unsigned int flags, - unsigned int cmd_size) + const struct blk_mq_ops *ops, unsigned int cmd_size) { int ret; @@ -4842,7 +4841,9 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ctrl->ops->flags & NVME_F_FABRICS) set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = ctrl->numa_node; - set->flags = flags; + set->flags = BLK_MQ_F_NO_SCHED; + if (ctrl->ops->flags & NVME_F_BLOCKING) + set->flags |= BLK_MQ_F_BLOCKING; set->cmd_size = cmd_size; set->driver_data = ctrl; set->nr_hw_queues = 1; @@ -4890,8 +4891,8 @@ void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl) EXPORT_SYMBOL_GPL(nvme_remove_admin_tag_set); int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, - const struct blk_mq_ops *ops, unsigned int flags, - unsigned int nr_maps, unsigned int cmd_size) + const struct blk_mq_ops *ops, unsigned int nr_maps, + unsigned int cmd_size) { int ret; @@ -4900,7 +4901,9 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, set->queue_depth = ctrl->sqsize + 1; set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = ctrl->numa_node; - set->flags = flags; + set->flags = BLK_MQ_F_SHOULD_MERGE; + if (ctrl->ops->flags & NVME_F_BLOCKING) + set->flags |= BLK_MQ_F_BLOCKING; set->cmd_size = cmd_size, set->driver_data = ctrl; set->nr_hw_queues = ctrl->queue_count - 1; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1a4e009ca542..4564f16a0b20 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2916,7 +2916,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) nvme_fc_init_io_queues(ctrl); ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, - &nvme_fc_mq_ops, BLK_MQ_F_SHOULD_MERGE, 1, + &nvme_fc_mq_ops, 1, struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, ctrl->lport->ops->fcprqst_priv_sz)); if (ret) @@ -3522,7 +3522,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, nvme_fc_init_queue(ctrl, 0); ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, - &nvme_fc_admin_mq_ops, BLK_MQ_F_NO_SCHED, + &nvme_fc_admin_mq_ops, struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, ctrl->lport->ops->fcprqst_priv_sz)); if (ret) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6c4565435fd9..6bbb73ef8b25 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -512,6 +512,8 @@ struct nvme_ctrl_ops { unsigned int flags; #define NVME_F_FABRICS (1 << 0) #define NVME_F_METADATA_SUPPORTED (1 << 1) +#define NVME_F_BLOCKING (1 << 2) + const struct attribute_group **dev_attr_groups; int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); @@ -742,12 +744,11 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl); int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended); int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, - const struct blk_mq_ops *ops, unsigned int flags, - unsigned int cmd_size); + const struct blk_mq_ops *ops, unsigned int cmd_size); void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl); int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, - const struct blk_mq_ops *ops, unsigned int flags, - unsigned int nr_maps, unsigned int cmd_size); + const struct blk_mq_ops *ops, unsigned int nr_maps, + unsigned int cmd_size); void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index cf8f500405b1..bbad26b82b56 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -798,7 +798,7 @@ static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl) NVME_RDMA_METADATA_SGL_SIZE; return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set, - &nvme_rdma_mq_ops, BLK_MQ_F_SHOULD_MERGE, + &nvme_rdma_mq_ops, ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2, cmd_size); } @@ -848,7 +848,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (new) { error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops, - BLK_MQ_F_NO_SCHED, sizeof(struct nvme_rdma_request) + NVME_RDMA_DATA_SGL_SIZE); if (error) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index fa245a50f630..f07b1fccaf19 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1858,7 +1858,6 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) if (new) { ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set, &nvme_tcp_mq_ops, - BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING, ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2, sizeof(struct nvme_tcp_request)); if (ret) @@ -1934,7 +1933,7 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) if (new) { error = nvme_alloc_admin_tag_set(ctrl, &to_tcp_ctrl(ctrl)->admin_tag_set, - &nvme_tcp_admin_mq_ops, BLK_MQ_F_BLOCKING, + &nvme_tcp_admin_mq_ops, sizeof(struct nvme_tcp_request)); if (error) goto out_free_queue; @@ -2512,7 +2511,7 @@ static const struct blk_mq_ops nvme_tcp_admin_mq_ops = { static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .name = "tcp", .module = THIS_MODULE, - .flags = NVME_F_FABRICS, + .flags = NVME_F_FABRICS | NVME_F_BLOCKING, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index da32727e6232..f2d24b2d992f 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -353,7 +353,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ctrl->ctrl.queue_count = 1; error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, - &nvme_loop_admin_mq_ops, BLK_MQ_F_NO_SCHED, + &nvme_loop_admin_mq_ops, sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (error) @@ -494,7 +494,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) return ret; ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, - &nvme_loop_mq_ops, BLK_MQ_F_SHOULD_MERGE, 1, + &nvme_loop_mq_ops, 1, sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (ret) From b794d1c2ad6d7921f2867ce393815ad31b5b5a83 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2022 17:27:07 +0100 Subject: [PATCH 19/22] nvme: only set reserved_tags in nvme_alloc_io_tag_set for fabrics controllers The reserved_tags are only needed for fabrics controllers. Right now only fabrics drivers call this helper, so this is harmless, but we'll use it in the PCIe driver soon. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f31586c46893..ed163c539767 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4899,7 +4899,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, memset(set, 0, sizeof(*set)); set->ops = ops; set->queue_depth = ctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; + if (ctrl->ops->flags & NVME_F_FABRICS) + set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; if (ctrl->ops->flags & NVME_F_BLOCKING) From 93b24f579c392bac2e491fee79ad5ce5a131992e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2022 17:28:48 +0100 Subject: [PATCH 20/22] nvme: add the Apple shared tag workaround to nvme_alloc_io_tag_set Add the apple shared tag workaround to nvme_alloc_io_tag_set to prepare for using that helper in the PCIe driver. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ed163c539767..9854f5dbcf4a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4899,7 +4899,13 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, memset(set, 0, sizeof(*set)); set->ops = ops; set->queue_depth = ctrl->sqsize + 1; - if (ctrl->ops->flags & NVME_F_FABRICS) + /* + * Some Apple controllers requires tags to be unique across admin and + * the (only) I/O queue, so reserve the first 32 tags of the I/O queue. + */ + if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS) + set->reserved_tags = NVME_AQ_DEPTH; + else if (ctrl->ops->flags & NVME_F_FABRICS) set->reserved_tags = NVMF_RESERVED_TAGS; set->numa_node = ctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; From 0da7feaa5913090a2b97177ed3d7fa07b2d1d99c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2022 17:26:25 +0100 Subject: [PATCH 21/22] nvme-pci: use the tagset alloc/free helpers Use the common helpers to allocate and free the tagsets. To make this work the generic nvme_ctrl now needs to be stored in the hctx private data instead of the nvme_dev. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 89 +++++++++-------------------------------- 1 file changed, 18 insertions(+), 71 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d613b4292c0f..f005e8569266 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -398,7 +398,7 @@ static int nvme_pci_npages_sgl(void) static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_dev *dev = data; + struct nvme_dev *dev = to_nvme_dev(data); struct nvme_queue *nvmeq = &dev->queues[0]; WARN_ON(hctx_idx != 0); @@ -411,7 +411,7 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_dev *dev = data; + struct nvme_dev *dev = to_nvme_dev(data); struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); @@ -423,7 +423,7 @@ static int nvme_pci_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_dev *dev = set->driver_data; + struct nvme_dev *dev = to_nvme_dev(set->driver_data); struct nvme_iod *iod = blk_mq_rq_to_pdu(req); nvme_req(req)->ctrl = &dev->ctrl; @@ -442,7 +442,7 @@ static int queue_irq_offset(struct nvme_dev *dev) static void nvme_pci_map_queues(struct blk_mq_tag_set *set) { - struct nvme_dev *dev = set->driver_data; + struct nvme_dev *dev = to_nvme_dev(set->driver_data); int i, qoff, offset; offset = queue_irq_offset(dev); @@ -1728,39 +1728,10 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) * queue to flush these to completion. */ nvme_unquiesce_admin_queue(&dev->ctrl); - blk_mq_destroy_queue(dev->ctrl.admin_q); - blk_put_queue(dev->ctrl.admin_q); - blk_mq_free_tag_set(&dev->admin_tagset); + nvme_remove_admin_tag_set(&dev->ctrl); } } -static int nvme_pci_alloc_admin_tag_set(struct nvme_dev *dev) -{ - struct blk_mq_tag_set *set = &dev->admin_tagset; - - set->ops = &nvme_mq_admin_ops; - set->nr_hw_queues = 1; - - set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->timeout = NVME_ADMIN_TIMEOUT; - set->numa_node = dev->ctrl.numa_node; - set->cmd_size = sizeof(struct nvme_iod); - set->flags = BLK_MQ_F_NO_SCHED; - set->driver_data = dev; - - if (blk_mq_alloc_tag_set(set)) - return -ENOMEM; - dev->ctrl.admin_tagset = set; - - dev->ctrl.admin_q = blk_mq_init_queue(set); - if (IS_ERR(dev->ctrl.admin_q)) { - blk_mq_free_tag_set(set); - dev->ctrl.admin_q = NULL; - return -ENOMEM; - } - return 0; -} - static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) { return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride); @@ -2515,40 +2486,13 @@ static void nvme_delete_io_queues(struct nvme_dev *dev) __nvme_delete_io_queues(dev, nvme_admin_delete_cq); } -static void nvme_pci_alloc_tag_set(struct nvme_dev *dev) +static unsigned int nvme_pci_nr_maps(struct nvme_dev *dev) { - struct blk_mq_tag_set * set = &dev->tagset; - int ret; - - set->ops = &nvme_mq_ops; - set->nr_hw_queues = dev->online_queues - 1; - set->nr_maps = 1; - if (dev->io_queues[HCTX_TYPE_READ]) - set->nr_maps = 2; if (dev->io_queues[HCTX_TYPE_POLL]) - set->nr_maps = 3; - set->timeout = NVME_IO_TIMEOUT; - set->numa_node = dev->ctrl.numa_node; - set->queue_depth = min_t(unsigned, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; - set->cmd_size = sizeof(struct nvme_iod); - set->flags = BLK_MQ_F_SHOULD_MERGE; - set->driver_data = dev; - - /* - * Some Apple controllers requires tags to be unique - * across admin and IO queue, so reserve the first 32 - * tags of the IO queue. - */ - if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) - set->reserved_tags = NVME_AQ_DEPTH; - - ret = blk_mq_alloc_tag_set(set); - if (ret) { - dev_warn(dev->ctrl.device, - "IO queues tagset allocation failed %d\n", ret); - return; - } - dev->ctrl.tagset = set; + return 3; + if (dev->io_queues[HCTX_TYPE_READ]) + return 2; + return 1; } static void nvme_pci_update_nr_queues(struct nvme_dev *dev) @@ -2770,7 +2714,7 @@ static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) static void nvme_free_tagset(struct nvme_dev *dev) { if (dev->tagset.tags) - blk_mq_free_tag_set(&dev->tagset); + nvme_remove_io_tag_set(&dev->ctrl); dev->ctrl.tagset = NULL; } @@ -3101,7 +3045,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto out_release_iod_mempool; - result = nvme_pci_alloc_admin_tag_set(dev); + result = nvme_alloc_admin_tag_set(&dev->ctrl, &dev->admin_tagset, + &nvme_mq_admin_ops, sizeof(struct nvme_iod)); if (result) goto out_disable; @@ -3131,12 +3076,14 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto out_disable; if (dev->online_queues > 1) { - nvme_pci_alloc_tag_set(dev); + nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops, + nvme_pci_nr_maps(dev), sizeof(struct nvme_iod)); nvme_dbbuf_set(dev); - } else { - dev_warn(dev->ctrl.device, "IO queues not created\n"); } + if (!dev->ctrl.tagset) + dev_warn(dev->ctrl.device, "IO queues not created\n"); + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { dev_warn(dev->ctrl.device, "failed to mark controller live state\n"); From 19b00e0069a3819eac0e0ea685899aba29e545d6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 7 Dec 2022 13:28:23 +0200 Subject: [PATCH 22/22] nvmet: don't open-code NVME_NS_ATTR_RO enumeration It is already there, just go ahead and use it. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 6b46f90a63cf..53a004ea320c 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -561,7 +561,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) } if (req->ns->readonly) - id->nsattr |= (1 << 0); + id->nsattr |= NVME_NS_ATTR_RO; done: if (!status) status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));