mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-12 05:24:12 +08:00
nvme-tcp: try to send request in queue_rq context
Today, nvme-tcp automatically schedules a send request to a workqueue context, which is 1 more than we'd need in case the socket buffer is wide open. However, because we have async send activity (as a result of r2t, or write_space callbacks), we need to synchronize sends from possibly multiple contexts (ideally all running on the same cpu though). Thus, we only try to send directly from queue_rq in cases: 1. the send_list is empty 2. we can send it synchronously (i.e. not from the RX path) 3. we run on the same cpu as the queue->io_cpu to avoid contention on the send operation. Proposed-by: Mark Wunderlich <mark.wunderlich@intel.com> Signed-off-by: Mark Wunderlich <mark.wunderlich@intel.com> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
72e5d757c6
commit
db5ad6b7f8
@ -76,6 +76,7 @@ struct nvme_tcp_queue {
|
|||||||
int io_cpu;
|
int io_cpu;
|
||||||
|
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
|
struct mutex send_mutex;
|
||||||
struct list_head send_list;
|
struct list_head send_list;
|
||||||
|
|
||||||
/* recv state */
|
/* recv state */
|
||||||
@ -132,6 +133,7 @@ static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
|
|||||||
static struct workqueue_struct *nvme_tcp_wq;
|
static struct workqueue_struct *nvme_tcp_wq;
|
||||||
static struct blk_mq_ops nvme_tcp_mq_ops;
|
static struct blk_mq_ops nvme_tcp_mq_ops;
|
||||||
static struct blk_mq_ops nvme_tcp_admin_mq_ops;
|
static struct blk_mq_ops nvme_tcp_admin_mq_ops;
|
||||||
|
static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
|
||||||
|
|
||||||
static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
|
static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
|
||||||
{
|
{
|
||||||
@ -258,15 +260,29 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req)
|
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
|
||||||
|
bool sync)
|
||||||
{
|
{
|
||||||
struct nvme_tcp_queue *queue = req->queue;
|
struct nvme_tcp_queue *queue = req->queue;
|
||||||
|
bool empty;
|
||||||
|
|
||||||
spin_lock(&queue->lock);
|
spin_lock(&queue->lock);
|
||||||
|
empty = list_empty(&queue->send_list) && !queue->request;
|
||||||
list_add_tail(&req->entry, &queue->send_list);
|
list_add_tail(&req->entry, &queue->send_list);
|
||||||
spin_unlock(&queue->lock);
|
spin_unlock(&queue->lock);
|
||||||
|
|
||||||
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
|
/*
|
||||||
|
* if we're the first on the send_list and we can try to send
|
||||||
|
* directly, otherwise queue io_work. Also, only do that if we
|
||||||
|
* are on the same cpu, so we don't introduce contention.
|
||||||
|
*/
|
||||||
|
if (queue->io_cpu == smp_processor_id() &&
|
||||||
|
sync && empty && mutex_trylock(&queue->send_mutex)) {
|
||||||
|
nvme_tcp_try_send(queue);
|
||||||
|
mutex_unlock(&queue->send_mutex);
|
||||||
|
} else {
|
||||||
|
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct nvme_tcp_request *
|
static inline struct nvme_tcp_request *
|
||||||
@ -579,7 +595,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
|
|||||||
req->state = NVME_TCP_SEND_H2C_PDU;
|
req->state = NVME_TCP_SEND_H2C_PDU;
|
||||||
req->offset = 0;
|
req->offset = 0;
|
||||||
|
|
||||||
nvme_tcp_queue_request(req);
|
nvme_tcp_queue_request(req, false);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1065,11 +1081,14 @@ static void nvme_tcp_io_work(struct work_struct *w)
|
|||||||
bool pending = false;
|
bool pending = false;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
result = nvme_tcp_try_send(queue);
|
if (mutex_trylock(&queue->send_mutex)) {
|
||||||
if (result > 0)
|
result = nvme_tcp_try_send(queue);
|
||||||
pending = true;
|
mutex_unlock(&queue->send_mutex);
|
||||||
else if (unlikely(result < 0))
|
if (result > 0)
|
||||||
break;
|
pending = true;
|
||||||
|
else if (unlikely(result < 0))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
result = nvme_tcp_try_recv(queue);
|
result = nvme_tcp_try_recv(queue);
|
||||||
if (result > 0)
|
if (result > 0)
|
||||||
@ -1321,6 +1340,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
|
|||||||
queue->ctrl = ctrl;
|
queue->ctrl = ctrl;
|
||||||
INIT_LIST_HEAD(&queue->send_list);
|
INIT_LIST_HEAD(&queue->send_list);
|
||||||
spin_lock_init(&queue->lock);
|
spin_lock_init(&queue->lock);
|
||||||
|
mutex_init(&queue->send_mutex);
|
||||||
INIT_WORK(&queue->io_work, nvme_tcp_io_work);
|
INIT_WORK(&queue->io_work, nvme_tcp_io_work);
|
||||||
queue->queue_size = queue_size;
|
queue->queue_size = queue_size;
|
||||||
|
|
||||||
@ -1545,6 +1565,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
|||||||
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
||||||
set->reserved_tags = 2; /* connect + keep-alive */
|
set->reserved_tags = 2; /* connect + keep-alive */
|
||||||
set->numa_node = NUMA_NO_NODE;
|
set->numa_node = NUMA_NO_NODE;
|
||||||
|
set->flags = BLK_MQ_F_BLOCKING;
|
||||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||||
set->driver_data = ctrl;
|
set->driver_data = ctrl;
|
||||||
set->nr_hw_queues = 1;
|
set->nr_hw_queues = 1;
|
||||||
@ -1556,7 +1577,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
|
|||||||
set->queue_depth = nctrl->sqsize + 1;
|
set->queue_depth = nctrl->sqsize + 1;
|
||||||
set->reserved_tags = 1; /* fabric connect */
|
set->reserved_tags = 1; /* fabric connect */
|
||||||
set->numa_node = NUMA_NO_NODE;
|
set->numa_node = NUMA_NO_NODE;
|
||||||
set->flags = BLK_MQ_F_SHOULD_MERGE;
|
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
|
||||||
set->cmd_size = sizeof(struct nvme_tcp_request);
|
set->cmd_size = sizeof(struct nvme_tcp_request);
|
||||||
set->driver_data = ctrl;
|
set->driver_data = ctrl;
|
||||||
set->nr_hw_queues = nctrl->queue_count - 1;
|
set->nr_hw_queues = nctrl->queue_count - 1;
|
||||||
@ -2115,7 +2136,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
|
|||||||
ctrl->async_req.curr_bio = NULL;
|
ctrl->async_req.curr_bio = NULL;
|
||||||
ctrl->async_req.data_len = 0;
|
ctrl->async_req.data_len = 0;
|
||||||
|
|
||||||
nvme_tcp_queue_request(&ctrl->async_req);
|
nvme_tcp_queue_request(&ctrl->async_req, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum blk_eh_timer_return
|
static enum blk_eh_timer_return
|
||||||
@ -2246,7 +2267,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|||||||
|
|
||||||
blk_mq_start_request(rq);
|
blk_mq_start_request(rq);
|
||||||
|
|
||||||
nvme_tcp_queue_request(req);
|
nvme_tcp_queue_request(req, true);
|
||||||
|
|
||||||
return BLK_STS_OK;
|
return BLK_STS_OK;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user