linux/net/core/devmem.c
Mina Almasry 170aafe35c netdev: support binding dma-buf to netdevice
Add a netdev_dmabuf_binding struct which represents the
dma-buf-to-netdevice binding. The netlink API will bind the dma-buf to
rx queues on the netdevice. On the binding, the dma_buf_attach
& dma_buf_map_attachment will occur. The entries in the sg_table from
mapping will be inserted into a genpool to make it ready
for allocation.

The chunks in the genpool are owned by a dmabuf_chunk_owner struct which
holds the dma-buf offset of the base of the chunk and the dma_addr of
the chunk. Both are needed to use allocations that come from this chunk.

We create a new type that represents an allocation from the genpool:
net_iov. We setup the net_iov allocation size in the
genpool to PAGE_SIZE for simplicity: to match the PAGE_SIZE normally
allocated by the page pool and given to the drivers.

The user can unbind the dmabuf from the netdevice by closing the netlink
socket that established the binding. We do this so that the binding is
automatically unbound even if the userspace process crashes.

The binding and unbinding leaves an indicator in struct netdev_rx_queue
that the given queue is bound, and the binding is actuated by resetting
the rx queue using the queue API.

The netdev_dmabuf_binding struct is refcounted, and releases its
resources only when all the refs are released.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com> # excluding netlink
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20240910171458.219195-4-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-09-11 20:44:31 -07:00

275 lines
6.6 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Devmem TCP
*
* Authors: Mina Almasry <almasrymina@google.com>
* Willem de Bruijn <willemdebruijn.kernel@gmail.com>
* Kaiyuan Zhang <kaiyuanz@google.com
*/
#include <linux/dma-buf.h>
#include <linux/genalloc.h>
#include <linux/mm.h>
#include <linux/netdevice.h>
#include <linux/types.h>
#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <trace/events/page_pool.h>
#include "devmem.h"
/* Device memory support */
/* Protected by rtnl_lock() */
static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
struct gen_pool_chunk *chunk,
void *not_used)
{
struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
kvfree(owner->niovs);
kfree(owner);
}
void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
{
size_t size, avail;
gen_pool_for_each_chunk(binding->chunk_pool,
net_devmem_dmabuf_free_chunk_owner, NULL);
size = gen_pool_size(binding->chunk_pool);
avail = gen_pool_avail(binding->chunk_pool);
if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu",
size, avail))
gen_pool_destroy(binding->chunk_pool);
dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
DMA_FROM_DEVICE);
dma_buf_detach(binding->dmabuf, binding->attachment);
dma_buf_put(binding->dmabuf);
xa_destroy(&binding->bound_rxqs);
kfree(binding);
}
void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
{
struct netdev_rx_queue *rxq;
unsigned long xa_idx;
unsigned int rxq_idx;
if (binding->list.next)
list_del(&binding->list);
xa_for_each(&binding->bound_rxqs, xa_idx, rxq) {
WARN_ON(rxq->mp_params.mp_priv != binding);
rxq->mp_params.mp_priv = NULL;
rxq_idx = get_netdev_rx_queue_index(rxq);
WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx));
}
xa_erase(&net_devmem_dmabuf_bindings, binding->id);
net_devmem_dmabuf_binding_put(binding);
}
int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
struct net_devmem_dmabuf_binding *binding,
struct netlink_ext_ack *extack)
{
struct netdev_rx_queue *rxq;
u32 xa_idx;
int err;
if (rxq_idx >= dev->real_num_rx_queues) {
NL_SET_ERR_MSG(extack, "rx queue index out of range");
return -ERANGE;
}
rxq = __netif_get_rx_queue(dev, rxq_idx);
if (rxq->mp_params.mp_priv) {
NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
return -EEXIST;
}
#ifdef CONFIG_XDP_SOCKETS
if (rxq->pool) {
NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
return -EBUSY;
}
#endif
err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
GFP_KERNEL);
if (err)
return err;
rxq->mp_params.mp_priv = binding;
err = netdev_rx_queue_restart(dev, rxq_idx);
if (err)
goto err_xa_erase;
return 0;
err_xa_erase:
rxq->mp_params.mp_priv = NULL;
xa_erase(&binding->bound_rxqs, xa_idx);
return err;
}
struct net_devmem_dmabuf_binding *
net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
struct netlink_ext_ack *extack)
{
struct net_devmem_dmabuf_binding *binding;
static u32 id_alloc_next;
struct scatterlist *sg;
struct dma_buf *dmabuf;
unsigned int sg_idx, i;
unsigned long virtual;
int err;
dmabuf = dma_buf_get(dmabuf_fd);
if (IS_ERR(dmabuf))
return ERR_CAST(dmabuf);
binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
dev_to_node(&dev->dev));
if (!binding) {
err = -ENOMEM;
goto err_put_dmabuf;
}
binding->dev = dev;
err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
binding, xa_limit_32b, &id_alloc_next,
GFP_KERNEL);
if (err < 0)
goto err_free_binding;
xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC);
refcount_set(&binding->ref, 1);
binding->dmabuf = dmabuf;
binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
if (IS_ERR(binding->attachment)) {
err = PTR_ERR(binding->attachment);
NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device");
goto err_free_id;
}
binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment,
DMA_FROM_DEVICE);
if (IS_ERR(binding->sgt)) {
err = PTR_ERR(binding->sgt);
NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment");
goto err_detach;
}
/* For simplicity we expect to make PAGE_SIZE allocations, but the
* binding can be much more flexible than that. We may be able to
* allocate MTU sized chunks here. Leave that for future work...
*/
binding->chunk_pool =
gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev));
if (!binding->chunk_pool) {
err = -ENOMEM;
goto err_unmap;
}
virtual = 0;
for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) {
dma_addr_t dma_addr = sg_dma_address(sg);
struct dmabuf_genpool_chunk_owner *owner;
size_t len = sg_dma_len(sg);
struct net_iov *niov;
owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
dev_to_node(&dev->dev));
if (!owner) {
err = -ENOMEM;
goto err_free_chunks;
}
owner->base_virtual = virtual;
owner->base_dma_addr = dma_addr;
owner->num_niovs = len / PAGE_SIZE;
owner->binding = binding;
err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
dma_addr, len, dev_to_node(&dev->dev),
owner);
if (err) {
kfree(owner);
err = -EINVAL;
goto err_free_chunks;
}
owner->niovs = kvmalloc_array(owner->num_niovs,
sizeof(*owner->niovs),
GFP_KERNEL);
if (!owner->niovs) {
err = -ENOMEM;
goto err_free_chunks;
}
for (i = 0; i < owner->num_niovs; i++) {
niov = &owner->niovs[i];
niov->owner = owner;
}
virtual += len;
}
return binding;
err_free_chunks:
gen_pool_for_each_chunk(binding->chunk_pool,
net_devmem_dmabuf_free_chunk_owner, NULL);
gen_pool_destroy(binding->chunk_pool);
err_unmap:
dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
DMA_FROM_DEVICE);
err_detach:
dma_buf_detach(dmabuf, binding->attachment);
err_free_id:
xa_erase(&net_devmem_dmabuf_bindings, binding->id);
err_free_binding:
kfree(binding);
err_put_dmabuf:
dma_buf_put(dmabuf);
return ERR_PTR(err);
}
void dev_dmabuf_uninstall(struct net_device *dev)
{
struct net_devmem_dmabuf_binding *binding;
struct netdev_rx_queue *rxq;
unsigned long xa_idx;
unsigned int i;
for (i = 0; i < dev->real_num_rx_queues; i++) {
binding = dev->_rx[i].mp_params.mp_priv;
if (!binding)
continue;
xa_for_each(&binding->bound_rxqs, xa_idx, rxq)
if (rxq == &dev->_rx[i]) {
xa_erase(&binding->bound_rxqs, xa_idx);
break;
}
}
}