cifs: Build the RDMA SGE list directly from an iterator

In the depths of the cifs RDMA code, extract part of an iov iterator
directly into an SGE list without going through an intermediate
scatterlist.

Note that this doesn't support extraction from an IOBUF- or UBUF-type
iterator (ie. user-supplied buffer).  The assumption is that the higher
layers will extract those to a BVEC-type iterator first and do whatever is
required to stop the pages from going away.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Tom Talpey <tom@talpey.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org
cc: linux-rdma@vger.kernel.org

Link: https://lore.kernel.org/r/166697260361.61150.5064013393408112197.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/166732032518.3186319.1859601819981624629.stgit@warthog.procyon.org.uk/ # rfc
Signed-off-by: Steve French <stfrench@microsoft.com>
This commit is contained in:
David Howells 2023-01-25 13:34:38 +00:00 committed by Steve French
parent d08089f649
commit 3d78fe73fa
2 changed files with 63 additions and 93 deletions

View File

@ -828,16 +828,16 @@ static int smbd_post_send(struct smbd_connection *info,
return rc; return rc;
} }
static int smbd_post_send_sgl(struct smbd_connection *info, static int smbd_post_send_iter(struct smbd_connection *info,
struct scatterlist *sgl, int data_length, int remaining_data_length) struct iov_iter *iter,
int *_remaining_data_length)
{ {
int num_sgs;
int i, rc; int i, rc;
int header_length; int header_length;
int data_length;
struct smbd_request *request; struct smbd_request *request;
struct smbd_data_transfer *packet; struct smbd_data_transfer *packet;
int new_credits; int new_credits;
struct scatterlist *sg;
wait_credit: wait_credit:
/* Wait for send credits. A SMBD packet needs one credit */ /* Wait for send credits. A SMBD packet needs one credit */
@ -881,6 +881,30 @@ wait_send_queue:
} }
request->info = info; request->info = info;
memset(request->sge, 0, sizeof(request->sge));
/* Fill in the data payload to find out how much data we can add */
if (iter) {
struct smb_extract_to_rdma extract = {
.nr_sge = 1,
.max_sge = SMBDIRECT_MAX_SEND_SGE,
.sge = request->sge,
.device = info->id->device,
.local_dma_lkey = info->pd->local_dma_lkey,
.direction = DMA_TO_DEVICE,
};
rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length,
&extract);
if (rc < 0)
goto err_dma;
data_length = rc;
request->num_sge = extract.nr_sge;
*_remaining_data_length -= data_length;
} else {
data_length = 0;
request->num_sge = 1;
}
/* Fill in the packet header */ /* Fill in the packet header */
packet = smbd_request_payload(request); packet = smbd_request_payload(request);
@ -902,7 +926,7 @@ wait_send_queue:
else else
packet->data_offset = cpu_to_le32(24); packet->data_offset = cpu_to_le32(24);
packet->data_length = cpu_to_le32(data_length); packet->data_length = cpu_to_le32(data_length);
packet->remaining_data_length = cpu_to_le32(remaining_data_length); packet->remaining_data_length = cpu_to_le32(*_remaining_data_length);
packet->padding = 0; packet->padding = 0;
log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
@ -918,7 +942,6 @@ wait_send_queue:
if (!data_length) if (!data_length)
header_length = offsetof(struct smbd_data_transfer, padding); header_length = offsetof(struct smbd_data_transfer, padding);
request->num_sge = 1;
request->sge[0].addr = ib_dma_map_single(info->id->device, request->sge[0].addr = ib_dma_map_single(info->id->device,
(void *)packet, (void *)packet,
header_length, header_length,
@ -932,23 +955,6 @@ wait_send_queue:
request->sge[0].length = header_length; request->sge[0].length = header_length;
request->sge[0].lkey = info->pd->local_dma_lkey; request->sge[0].lkey = info->pd->local_dma_lkey;
/* Fill in the packet data payload */
num_sgs = sgl ? sg_nents(sgl) : 0;
for_each_sg(sgl, sg, num_sgs, i) {
request->sge[i+1].addr =
ib_dma_map_page(info->id->device, sg_page(sg),
sg->offset, sg->length, DMA_TO_DEVICE);
if (ib_dma_mapping_error(
info->id->device, request->sge[i+1].addr)) {
rc = -EIO;
request->sge[i+1].addr = 0;
goto err_dma;
}
request->sge[i+1].length = sg->length;
request->sge[i+1].lkey = info->pd->local_dma_lkey;
request->num_sge++;
}
rc = smbd_post_send(info, request); rc = smbd_post_send(info, request);
if (!rc) if (!rc)
return 0; return 0;
@ -987,8 +993,10 @@ err_wait_credit:
*/ */
static int smbd_post_send_empty(struct smbd_connection *info) static int smbd_post_send_empty(struct smbd_connection *info)
{ {
int remaining_data_length = 0;
info->count_send_empty++; info->count_send_empty++;
return smbd_post_send_sgl(info, NULL, 0, 0); return smbd_post_send_iter(info, NULL, &remaining_data_length);
} }
/* /*
@ -1934,42 +1942,6 @@ out:
return rc; return rc;
} }
/*
* Send the contents of an iterator
* @iter: The iterator to send
* @_remaining_data_length: remaining data to send in this payload
*/
static int smbd_post_send_iter(struct smbd_connection *info,
struct iov_iter *iter,
int *_remaining_data_length)
{
struct scatterlist sgl[SMBDIRECT_MAX_SEND_SGE - 1];
unsigned int max_payload = info->max_send_size - sizeof(struct smbd_data_transfer);
ssize_t rc;
/* We're not expecting a user-backed iter */
WARN_ON(iov_iter_extract_will_pin(iter));
do {
struct sg_table sgtable = { .sgl = sgl };
size_t maxlen = min_t(size_t, *_remaining_data_length, max_payload);
sg_init_table(sgtable.sgl, ARRAY_SIZE(sgl));
rc = netfs_extract_iter_to_sg(iter, maxlen,
&sgtable, ARRAY_SIZE(sgl), 0);
if (rc < 0)
break;
if (WARN_ON_ONCE(sgtable.nents == 0))
return -EIO;
sg_mark_end(&sgl[sgtable.nents - 1]);
*_remaining_data_length -= rc;
rc = smbd_post_send_sgl(info, sgl, rc, *_remaining_data_length);
} while (rc == 0 && iov_iter_count(iter) > 0);
return rc;
}
/* /*
* Send data to transport * Send data to transport
* Each rqst is transported as a SMBDirect payload * Each rqst is transported as a SMBDirect payload
@ -2130,10 +2102,10 @@ static void destroy_mr_list(struct smbd_connection *info)
cancel_work_sync(&info->mr_recovery_work); cancel_work_sync(&info->mr_recovery_work);
list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { list_for_each_entry_safe(mr, tmp, &info->mr_list, list) {
if (mr->state == MR_INVALIDATED) if (mr->state == MR_INVALIDATED)
ib_dma_unmap_sg(info->id->device, mr->sgl, ib_dma_unmap_sg(info->id->device, mr->sgt.sgl,
mr->sgl_count, mr->dir); mr->sgt.nents, mr->dir);
ib_dereg_mr(mr->mr); ib_dereg_mr(mr->mr);
kfree(mr->sgl); kfree(mr->sgt.sgl);
kfree(mr); kfree(mr);
} }
} }
@ -2169,11 +2141,10 @@ static int allocate_mr_list(struct smbd_connection *info)
info->mr_type, info->max_frmr_depth); info->mr_type, info->max_frmr_depth);
goto out; goto out;
} }
smbdirect_mr->sgl = kcalloc( smbdirect_mr->sgt.sgl = kcalloc(info->max_frmr_depth,
info->max_frmr_depth, sizeof(struct scatterlist),
sizeof(struct scatterlist), GFP_KERNEL);
GFP_KERNEL); if (!smbdirect_mr->sgt.sgl) {
if (!smbdirect_mr->sgl) {
log_rdma_mr(ERR, "failed to allocate sgl\n"); log_rdma_mr(ERR, "failed to allocate sgl\n");
ib_dereg_mr(smbdirect_mr->mr); ib_dereg_mr(smbdirect_mr->mr);
goto out; goto out;
@ -2192,7 +2163,7 @@ out:
list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) { list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) {
list_del(&smbdirect_mr->list); list_del(&smbdirect_mr->list);
ib_dereg_mr(smbdirect_mr->mr); ib_dereg_mr(smbdirect_mr->mr);
kfree(smbdirect_mr->sgl); kfree(smbdirect_mr->sgt.sgl);
kfree(smbdirect_mr); kfree(smbdirect_mr);
} }
return -ENOMEM; return -ENOMEM;
@ -2246,22 +2217,20 @@ again:
/* /*
* Transcribe the pages from an iterator into an MR scatterlist. * Transcribe the pages from an iterator into an MR scatterlist.
* @iter: The iterator to transcribe
* @_remaining_data_length: remaining data to send in this payload
*/ */
static int smbd_iter_to_mr(struct smbd_connection *info, static int smbd_iter_to_mr(struct smbd_connection *info,
struct iov_iter *iter, struct iov_iter *iter,
struct scatterlist *sgl, struct sg_table *sgt,
unsigned int num_pages) unsigned int max_sg)
{ {
struct sg_table sgtable = { .sgl = sgl };
int ret; int ret;
sg_init_table(sgl, num_pages); memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist));
ret = netfs_extract_iter_to_sg(iter, iov_iter_count(iter), ret = netfs_extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0);
&sgtable, num_pages, 0);
WARN_ON(ret < 0); WARN_ON(ret < 0);
if (sgt->nents > 0)
sg_mark_end(&sgt->sgl[sgt->nents - 1]);
return ret; return ret;
} }
@ -2298,25 +2267,27 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info,
dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
smbdirect_mr->dir = dir; smbdirect_mr->dir = dir;
smbdirect_mr->need_invalidate = need_invalidate; smbdirect_mr->need_invalidate = need_invalidate;
smbdirect_mr->sgl_count = num_pages; smbdirect_mr->sgt.nents = 0;
smbdirect_mr->sgt.orig_nents = 0;
log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx\n", log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n",
num_pages, iov_iter_count(iter)); num_pages, iov_iter_count(iter), info->max_frmr_depth);
smbd_iter_to_mr(info, iter, smbdirect_mr->sgl, num_pages); smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth);
rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgl, num_pages, dir); rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgt.sgl,
smbdirect_mr->sgt.nents, dir);
if (!rc) { if (!rc) {
log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n",
num_pages, dir, rc); num_pages, dir, rc);
goto dma_map_error; goto dma_map_error;
} }
rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgl, num_pages, rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl,
NULL, PAGE_SIZE); smbdirect_mr->sgt.nents, NULL, PAGE_SIZE);
if (rc != num_pages) { if (rc != smbdirect_mr->sgt.nents) {
log_rdma_mr(ERR, log_rdma_mr(ERR,
"ib_map_mr_sg failed rc = %d num_pages = %x\n", "ib_map_mr_sg failed rc = %d nents = %x\n",
rc, num_pages); rc, smbdirect_mr->sgt.nents);
goto map_mr_error; goto map_mr_error;
} }
@ -2348,8 +2319,8 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info,
/* If all failed, attempt to recover this MR by setting it MR_ERROR*/ /* If all failed, attempt to recover this MR by setting it MR_ERROR*/
map_mr_error: map_mr_error:
ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgl, ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgt.sgl,
smbdirect_mr->sgl_count, smbdirect_mr->dir); smbdirect_mr->sgt.nents, smbdirect_mr->dir);
dma_map_error: dma_map_error:
smbdirect_mr->state = MR_ERROR; smbdirect_mr->state = MR_ERROR;
@ -2416,8 +2387,8 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
if (smbdirect_mr->state == MR_INVALIDATED) { if (smbdirect_mr->state == MR_INVALIDATED) {
ib_dma_unmap_sg( ib_dma_unmap_sg(
info->id->device, smbdirect_mr->sgl, info->id->device, smbdirect_mr->sgt.sgl,
smbdirect_mr->sgl_count, smbdirect_mr->sgt.nents,
smbdirect_mr->dir); smbdirect_mr->dir);
smbdirect_mr->state = MR_READY; smbdirect_mr->state = MR_READY;
if (atomic_inc_return(&info->mr_ready_count) == 1) if (atomic_inc_return(&info->mr_ready_count) == 1)

View File

@ -288,8 +288,7 @@ struct smbd_mr {
struct list_head list; struct list_head list;
enum mr_state state; enum mr_state state;
struct ib_mr *mr; struct ib_mr *mr;
struct scatterlist *sgl; struct sg_table sgt;
int sgl_count;
enum dma_data_direction dir; enum dma_data_direction dir;
union { union {
struct ib_reg_wr wr; struct ib_reg_wr wr;